+
+## Introduction
+
+Applio is a powerful voice conversion tool focused on simplicity, quality, and performance. Whether you're an artist, developer, or researcher, Applio offers a straightforward platform for high-quality voice transformations. Its flexible design allows for customization through plugins and configurations, catering to a wide range of projects.
+
+## Getting Started
+
+### 1. Installation
+
+Run the installation script based on your operating system:
+
+- **Windows:** Double-click `run-install.bat`.
+- **Linux/macOS:** Execute `run-install.sh`.
+
+### 2. Running Applio
+
+Start Applio using:
+
+- **Windows:** Double-click `run-applio.bat`.
+- **Linux/macOS:** Run `run-applio.sh`.
+
+This launches the Gradio interface in your default browser.
+
+### 3. Optional: TensorBoard Monitoring
+
+To monitor training or visualize data:
+
+- **Windows:** Run `run-tensorboard.bat`.
+- **Linux/macOS:** Run `run-tensorboard.sh`.
+
+For more detailed instructions, visit the [documentation](https://docs.applio.org).
+
+## Commercial Usage
+
+For commercial use, follow the [MIT license](./LICENSE) and contact us at support@applio.org to ensure ethical use. The use of Applio-generated audio files must comply with applicable copyrights. Consider supporting Applio’s development [through a donation](https://ko-fi.com/iahispano).
+
+## References
+
+Applio is made possible thanks to these projects and their references:
+
+- [gradio-screen-recorder](https://huggingface.co/spaces/gstaff/gradio-screen-recorder) by gstaff
+- [rvc-cli](https://github.com/blaisewf/rvc-cli) by blaisewf
+
+### Contributors
+
+
+
+
diff --git a/app.py b/app.py
index 574b908f220d990da8aa4e4706f7413e7e843c15..7d9e72f17a02583181e82aeadab7d9dad0f3c6ea 100644
--- a/app.py
+++ b/app.py
@@ -1,14 +1,129 @@
import gradio as gr
-import spaces
-import torch
+import sys
+import os
+import logging
-zero = torch.Tensor([0]).cuda()
-print(zero.device) # <-- 'cpu' 🤔
+# Constants
+DEFAULT_PORT = 6969
+MAX_PORT_ATTEMPTS = 10
-@spaces.GPU
-def greet(n):
- print(zero.device) # <-- 'cuda:0' 🤗
- return f"Hello {zero + n} Tensor"
+# Set up logging
+logging.getLogger("uvicorn").setLevel(logging.WARNING)
+logging.getLogger("httpx").setLevel(logging.WARNING)
-demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
-demo.launch()
\ No newline at end of file
+# Add current directory to sys.path
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+# Import Tabs
+from tabs.inference.inference import inference_tab
+from tabs.train.train import train_tab
+from tabs.extra.extra import extra_tab
+from tabs.report.report import report_tab
+from tabs.download.download import download_tab
+from tabs.tts.tts import tts_tab
+from tabs.voice_blender.voice_blender import voice_blender_tab
+from tabs.plugins.plugins import plugins_tab
+from tabs.settings.version import version_tab
+from tabs.settings.lang import lang_tab
+from tabs.settings.restart import restart_tab
+from tabs.settings.presence import presence_tab, load_config_presence
+from tabs.settings.flask_server import flask_server_tab
+from tabs.settings.fake_gpu import fake_gpu_tab, gpu_available, load_fake_gpu
+from tabs.settings.themes import theme_tab
+from tabs.settings.precision import precision_tab
+
+# Run prerequisites
+from core import run_prerequisites_script
+
+run_prerequisites_script(False, True, True, True)
+
+# Initialize i18n
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+# Start Discord presence if enabled
+if load_config_presence():
+ from assets.discord_presence import RPCManager
+
+ RPCManager.start_presence()
+
+# Check installation
+import assets.installation_checker as installation_checker
+
+installation_checker.check_installation()
+
+# Start Flask server if enabled
+from assets.flask.server import start_flask, load_config_flask
+
+if load_config_flask():
+ print("Starting Flask server")
+ start_flask()
+
+# Load theme
+import assets.themes.loadThemes as loadThemes
+
+my_applio = loadThemes.load_json() or "ParityError/Interstellar"
+
+# Define Gradio interface
+with gr.Blocks(theme=my_applio, title="Applio") as Applio:
+ gr.Markdown("# Applio")
+ gr.Markdown(
+ i18n(
+ "VITS-based Voice Conversion focused on simplicity, quality and performance."
+ )
+ )
+ gr.Markdown(
+ i18n(
+ "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)"
+ )
+ )
+ with gr.Tab(i18n("Inference")):
+ inference_tab()
+
+ with gr.Tab(i18n("Train")):
+ if gpu_available() or load_fake_gpu():
+ train_tab()
+ else:
+ gr.Markdown(
+ i18n(
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option."
+ )
+ )
+
+ with gr.Tab(i18n("TTS")):
+ tts_tab()
+
+ with gr.Tab(i18n("Voice Blender")):
+ voice_blender_tab()
+
+ with gr.Tab(i18n("Plugins")):
+ plugins_tab()
+
+ with gr.Tab(i18n("Download")):
+ download_tab()
+
+ with gr.Tab(i18n("Report a Bug")):
+ report_tab()
+
+ with gr.Tab(i18n("Extra")):
+ extra_tab()
+
+ with gr.Tab(i18n("Settings")):
+ presence_tab()
+ flask_server_tab()
+ precision_tab()
+ if not gpu_available():
+ fake_gpu_tab()
+ theme_tab()
+ version_tab()
+ lang_tab()
+ restart_tab()
+
+
+def launch_gradio():
+ Applio.launch(share=True)
+
+if __name__ == "__main__":
+ launch_gradio()
diff --git a/assets/Applio.ipynb b/assets/Applio.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7480fe7310f3de5902666009eb97c0e75d7aa442
--- /dev/null
+++ b/assets/Applio.ipynb
@@ -0,0 +1,452 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ymhGfgFSR17k"
+ },
+ "source": [
+ "## **Applio**\n",
+ "A simple, high-quality voice conversion tool focused on ease of use and performance. \n",
+ "\n",
+ "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n",
+ "\n",
+ " \n",
+ "\n",
+ "### **Credits**\n",
+ "- Encryption method: [Hina](https://github.com/hinabl)\n",
+ "- Extra section: [Poopmaster](https://github.com/poiqazwsx)\n",
+ "- Main development: [Applio Team](https://github.com/IAHispano)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "vtON700qokuQ"
+ },
+ "outputs": [],
+ "source": [
+ "# @title **Install Applio**\n",
+ "import os\n",
+ "import codecs\n",
+ "import shutil\n",
+ "import tarfile\n",
+ "import subprocess\n",
+ "\n",
+ "from pathlib import Path\n",
+ "from IPython.display import clear_output\n",
+ "\n",
+ "rot_47 = lambda encoded_text: \"\".join(\n",
+ " [\n",
+ " (\n",
+ " chr(\n",
+ " (ord(c) - (ord(\"a\") if c.islower() else ord(\"A\")) - 47) % 26\n",
+ " + (ord(\"a\") if c.islower() else ord(\"A\"))\n",
+ " )\n",
+ " if c.isalpha()\n",
+ " else c\n",
+ " )\n",
+ " for c in encoded_text\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "org_name = rot_47(\"Vkkgdj\")\n",
+ "new_name = rot_47(\"kmjbmvh_hg\")\n",
+ "uioawhd = rot_47(codecs.decode(\"pbbxa://oqbpcj.kwu/QIPqaxivw/Ixxtqw.oqb\", \"rot_13\"))\n",
+ "uyadwa = codecs.decode(\"ncc.cl\", \"rot_13\")\n",
+ "A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n",
+ "D = \"/\"\n",
+ "\n",
+ "!git clone --depth 1 $uioawhd $new_name --branch 3.2.4 --single-branch\n",
+ "%cd $new_name/\n",
+ "\n",
+ "clear_output()\n",
+ "\n",
+ "def vidal_setup():\n",
+ " A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n",
+ " D = \"/\"\n",
+ " if not os.path.exists(A):\n",
+ " M = os.path.dirname(A)\n",
+ " os.makedirs(M, exist_ok=True)\n",
+ " print(\"No cached install found..\")\n",
+ " try:\n",
+ " N = codecs.decode(\n",
+ " \"uggcf://uhttvatsnpr.pb/VNUvfcnab/Nccyvb/erfbyir/znva/Raivebzrag/Pbyno/Cache.gne.tm\",\n",
+ " \"rot_13\",\n",
+ " )\n",
+ " subprocess.run([\"wget\", \"-O\", A, N])\n",
+ " print(\"Download completed successfully!\")\n",
+ " except Exception as H:\n",
+ " print(str(H))\n",
+ " if os.path.exists(A):\n",
+ " os.remove(A)\n",
+ " if Path(A).exists():\n",
+ " with tarfile.open(A, \"r:gz\") as I:\n",
+ " I.extractall(D)\n",
+ " print(f\"Extraction of {A} to {D} completed.\")\n",
+ " if os.path.exists(A):\n",
+ " os.remove(A)\n",
+ "\n",
+ "vidal_setup()\n",
+ "clear_output()\n",
+ "print(\"Finished installing requirements!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "-7cQtXouqpQi"
+ },
+ "outputs": [],
+ "source": [
+ "# @title **Start Applio**\n",
+ "# @markdown ### Just activate this in case the share link of the gradio dont work\n",
+ "import codecs\n",
+ "import threading\n",
+ "import urllib.request\n",
+ "import time\n",
+ "import ipywidgets as widgets\n",
+ "from IPython.display import display\n",
+ "import os\n",
+ "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
+ "!npm install -g localtunnel\n",
+ "new_name = rot_47(\"kmjbmvh_hg\")\n",
+ "%cd $new_name/\n",
+ "share_tunnel = False # @param {type:\"boolean\"}\n",
+ "def start_applio():\n",
+ " if share_tunnel:\n",
+ " !python $uyadwa --listen\n",
+ " else:\n",
+ " !python $uyadwa --listen --share\n",
+ "\n",
+ "%load_ext tensorboard\n",
+ "%reload_ext tensorboard\n",
+ "%tensorboard --logdir logs --bind_all\n",
+ "\n",
+ "if \"autobackups\" not in globals():\n",
+ " autobackups = False\n",
+ "\n",
+ "if autobackups:\n",
+ " thread = threading.Thread(target=backup_files)\n",
+ " thread.start()\n",
+ "\n",
+ "thread_applio = threading.Thread(target=start_applio)\n",
+ "thread_applio.start()\n",
+ "\n",
+ "if share_tunnel:\n",
+ " if not os.path.exists(codecs.decode(\"eip/zbqryf/cergenvarqf/cergenvarq_i2/s0T48x.cgu\", \"rot_13\")):\n",
+ " while not os.path.exists(codecs.decode(\"eip/zbqryf/cergenvarqf/cergenvarq_i2/s0T48x.cgu\", \"rot_13\")):\n",
+ " time.sleep(2)\n",
+ " time.sleep(5)\n",
+ " else:\n",
+ " time.sleep(10)\n",
+ " with open('url.txt', 'w') as file:\n",
+ " file.write('')\n",
+ "\n",
+ " get_ipython().system_raw('lt --port 6969 >> url.txt 2>&1 &')\n",
+ "\n",
+ " time.sleep(4)\n",
+ "\n",
+ " endpoint_ip = urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip(\"\\n\")\n",
+ "\n",
+ " with open('url.txt', 'r') as file:\n",
+ " tunnel_url = file.read()\n",
+ " tunnel_url = tunnel_url.replace(\"your url is: \", \"\")\n",
+ "\n",
+ " print(f\"Share Link: \\033[0m\\033[93m{tunnel_url}\\033[0m\", end=\"\\033[0m\\n\")\n",
+ "\n",
+ " password_endpoint_widget = widgets.Text(\n",
+ " value=endpoint_ip,\n",
+ " description='Password IP:',\n",
+ " disabled=True\n",
+ " )\n",
+ " display(password_endpoint_widget)\n",
+ "\n",
+ "\n",
+ "\n",
+ "while True:\n",
+ " time.sleep(5)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3b59-2x-qEnX"
+ },
+ "source": [
+ "### **Extra**\n",
+ "Enjoy extra options that can make it easier for you to use Applio\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "19LNv6iYqF6_"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Mount Drive\n",
+ "# @markdown Mount the files from Google Drive to the Colab.\n",
+ "from google.colab import drive\n",
+ "\n",
+ "drive.mount(\"/content/drive\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "I5o6MlpFouiG"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Auto Backup\n",
+ "# @markdown When running it, it will be activated or deactivated previously to start up together with Applio.\n",
+ "LOGS_FOLDER = \"/content/program_ml/logs/\"\n",
+ "GOOGLE_DRIVE_PATH = \"/content/drive/MyDrive/ApplioBackup\"\n",
+ "\n",
+ "if \"autobackups\" not in globals():\n",
+ " autobackups = False\n",
+ "\n",
+ "\n",
+ "def backup_files():\n",
+ " print(\"\\nStarting backup loop...\")\n",
+ " last_backup_timestamps_path = os.path.join(\n",
+ " LOGS_FOLDER, \"last_backup_timestamps.txt\"\n",
+ " )\n",
+ " fully_updated = False\n",
+ "\n",
+ " while True:\n",
+ " try:\n",
+ " updated = False\n",
+ " last_backup_timestamps = {}\n",
+ "\n",
+ " try:\n",
+ " with open(last_backup_timestamps_path, \"r\") as f:\n",
+ " last_backup_timestamps = dict(line.strip().split(\":\") for line in f)\n",
+ " except FileNotFoundError:\n",
+ " pass\n",
+ "\n",
+ " for root, dirs, files in os.walk(LOGS_FOLDER):\n",
+ " # Excluding \"zips\" directory\n",
+ " if \"zips\" in dirs:\n",
+ " dirs.remove(\"zips\")\n",
+ " if \"mute\" in dirs:\n",
+ " dirs.remove(\"mute\")\n",
+ " for filename in files:\n",
+ " if filename != \"last_backup_timestamps.txt\":\n",
+ " filepath = os.path.join(root, filename)\n",
+ " if os.path.isfile(filepath):\n",
+ " backup_filepath = os.path.join(\n",
+ " GOOGLE_DRIVE_PATH,\n",
+ " os.path.relpath(filepath, LOGS_FOLDER),\n",
+ " )\n",
+ " backup_folderpath = os.path.dirname(backup_filepath)\n",
+ " if not os.path.exists(backup_folderpath):\n",
+ " os.makedirs(backup_folderpath)\n",
+ " print(\n",
+ " f\"Created backup folder: {backup_folderpath}\",\n",
+ " flush=True,\n",
+ " )\n",
+ " last_backup_timestamp = last_backup_timestamps.get(filepath)\n",
+ " current_timestamp = os.path.getmtime(filepath)\n",
+ " if (\n",
+ " last_backup_timestamp is None\n",
+ " or float(last_backup_timestamp) < current_timestamp\n",
+ " ):\n",
+ " shutil.copy2(filepath, backup_filepath)\n",
+ " last_backup_timestamps[filepath] = str(\n",
+ " current_timestamp\n",
+ " )\n",
+ " if last_backup_timestamp is None:\n",
+ " print(f\"Backed up file: {filename}\")\n",
+ " else:\n",
+ " print(f\"Updating backed up file: {filename}\")\n",
+ " updated = True\n",
+ " fully_updated = False\n",
+ "\n",
+ " for filepath in list(last_backup_timestamps.keys()):\n",
+ " if not os.path.exists(filepath):\n",
+ " backup_filepath = os.path.join(\n",
+ " GOOGLE_DRIVE_PATH, os.path.relpath(filepath, LOGS_FOLDER)\n",
+ " )\n",
+ " if os.path.exists(backup_filepath):\n",
+ " os.remove(backup_filepath)\n",
+ " print(f\"Deleted file: {filepath}\")\n",
+ " del last_backup_timestamps[filepath]\n",
+ " updated = True\n",
+ " fully_updated = False\n",
+ "\n",
+ " if not updated and not fully_updated:\n",
+ " print(\"Files are up to date.\")\n",
+ " fully_updated = True\n",
+ " sleep_time = 15\n",
+ " else:\n",
+ " sleep_time = 0.1\n",
+ "\n",
+ " with open(last_backup_timestamps_path, \"w\") as f:\n",
+ " for filepath, timestamp in last_backup_timestamps.items():\n",
+ " f.write(f\"{filepath}:{timestamp}\\n\")\n",
+ "\n",
+ " time.sleep(sleep_time)\n",
+ "\n",
+ " except Exception as error:\n",
+ " print(f\"An error occurred during backup: {error}\", flush=True)\n",
+ "\n",
+ "\n",
+ "if autobackups:\n",
+ " autobackups = False\n",
+ " print(\"Autobackup Disabled\")\n",
+ "else:\n",
+ " autobackups = True\n",
+ " print(\"Autobackup Enabled\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# @title Setup new logs folder format\n",
+ "# @markdown Put the exact name you put as your Model Name in Applio.\n",
+ "modelname = \"My-Project\" # @param {type:\"string\"}\n",
+ "logs_folder = f\"/content/program_ml/logs/{modelname}/\"\n",
+ "\n",
+ "import os\n",
+ "\n",
+ "folder_renames = {\n",
+ " \"0_gt_wavs\": \"sliced_audios\",\n",
+ " \"1_16k_wavs\": \"sliced_audios_16k\",\n",
+ " \"2a_f0\": \"f0\",\n",
+ " \"2b-f0nsf\": \"f0_voiced\",\n",
+ " \"3_feature768\": \"v2_extracted\"\n",
+ "}\n",
+ "\n",
+ "def rename_folders(base_path, rename_dict):\n",
+ " for old_name, new_name in rename_dict.items():\n",
+ " old_path = os.path.join(base_path, old_name)\n",
+ " new_path = os.path.join(base_path, new_name)\n",
+ " if os.path.exists(old_path):\n",
+ " os.rename(old_path, new_path)\n",
+ " print(f\"Renamed {old_path} to {new_path}\")\n",
+ " else:\n",
+ " print(f\"Folder {old_path} does not exist\")\n",
+ "\n",
+ "rename_folders(logs_folder, folder_renames)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "ifV_vc4h4Uvx"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Load a Backup\n",
+ "from google.colab import drive\n",
+ "import os\n",
+ "\n",
+ "# @markdown Put the exact name you put as your Model Name in Applio.\n",
+ "modelname = \"My-Project\" # @param {type:\"string\"}\n",
+ "source_path = \"/content/drive/MyDrive/ApplioBackup/\" + modelname\n",
+ "destination_path = \"/content/program_ml/logs/\" + modelname\n",
+ "backup_timestamps_file = \"last_backup_timestamps.txt\"\n",
+ "if not os.path.exists(source_path):\n",
+ " print(\n",
+ " \"The model folder does not exist. Please verify the name is correct or check your Google Drive.\"\n",
+ " )\n",
+ "else:\n",
+ " time_ = os.path.join(\"/content/drive/MyDrive/ApplioBackup/\", backup_timestamps_file)\n",
+ " time__ = os.path.join(\"/content/program_ml/logs/\", backup_timestamps_file)\n",
+ " if os.path.exists(time_):\n",
+ " shutil.copy(time_, time__)\n",
+ " shutil.copytree(source_path, destination_path)\n",
+ " print(\"Model backup loaded successfully.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "leWbhk1X4XoY"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Download all custom pretrains\n",
+ "import os\n",
+ "import urllib.request\n",
+ "\n",
+ "%mkdir /content/program_ml/rvc/models/pretraineds/pretraineds_custom\n",
+ "pretrained_urls = [\n",
+ " # Ov2 Super\n",
+ " \"https://huggingface.co/ORVC/Ov2Super/resolve/main/f0Ov2Super32kG.pth\",\n",
+ " \"https://huggingface.co/ORVC/Ov2Super/resolve/main/f0Ov2Super32kD.pth\",\n",
+ " \"https://huggingface.co/ORVC/Ov2Super/resolve/main/f0Ov2Super40kG.pth\",\n",
+ " \"https://huggingface.co/ORVC/Ov2Super/resolve/main/f0Ov2Super40kD.pth\",\n",
+ "\n",
+ " # TITAN\n",
+ " \"https://huggingface.co/blaise-tk/TITAN/resolve/main/models/medium/40k/pretrained/G-f040k-TITAN-Medium.pth\",\n",
+ " \"https://huggingface.co/blaise-tk/TITAN/resolve/main/models/medium/40k/pretrained/D-f040k-TITAN-Medium.pth\",\n",
+ " \"https://huggingface.co/blaise-tk/TITAN/resolve/main/models/medium/32k/pretrained/G-f032k-TITAN-Medium.pth\",\n",
+ " \"https://huggingface.co/blaise-tk/TITAN/resolve/main/models/medium/32k/pretrained/D-f032k-TITAN-Medium.pth\",\n",
+ "\n",
+ " # Snowie V3\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-32k/resolve/main/D_SnowieV3.1_32k.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-32k/resolve/main/G_SnowieV3.1_32k.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-40k/resolve/main/G_SnowieV3.1_40k.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-40k/resolve/main/D_SnowieV3.1_40k.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-48k/resolve/main/G_SnowieV3.1_48k.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-48k/resolve/main/D_SnowieV3.1_48k.pth\",\n",
+ "\n",
+ " # RIN E3\n",
+ " \"https://huggingface.co/MUSTAR/RIN_E3/resolve/main/RIN_E3_G.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/RIN_E3/resolve/main/RIN_E3_D.pth\",\n",
+ "\n",
+ " # KLM\n",
+ " \"https://huggingface.co/SeoulStreamingStation/KLM4.1/resolve/main/D_KLM41_32k.pth\",\n",
+ " \"https://huggingface.co/SeoulStreamingStation/KLM4.1/resolve/main/G_KLM41_32k.pth\",\n",
+ " \"https://huggingface.co/SeoulStreamingStation/KLM4.1/resolve/main/D_KLM41_48k.pth\",\n",
+ " \"https://huggingface.co/SeoulStreamingStation/KLM4.1/resolve/main/G_KLM41_48k.pth\",\n",
+ "\n",
+ " # SnowieV3 X RIN_E3\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-X-RinE3-40K/resolve/main/D_Snowie-X-Rin_40k.pth\",\n",
+ " \"https://huggingface.co/MUSTAR/SnowieV3.1-X-RinE3-40K/resolve/main/G_Snowie-X-Rin_40k.pth\",\n",
+ "]\n",
+ "output_directory = \"/content/program_ml/rvc/models/pretraineds/pretraineds_custom\"\n",
+ "for url in pretrained_urls:\n",
+ " filename = os.path.join(output_directory, os.path.basename(url))\n",
+ " urllib.request.urlretrieve(url, filename)"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "collapsed_sections": [
+ "3b59-2x-qEnX"
+ ],
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/assets/Applio_Kaggle.ipynb b/assets/Applio_Kaggle.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..353ad28f571935455ff609a0bf4962f819fa4a0a
--- /dev/null
+++ b/assets/Applio_Kaggle.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{},"source":["## **Applio**\n","A simple, high-quality voice conversion tool focused on ease of use and performance.\n","\n","[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n","\n"," \n","\n","### **Credits**\n","- Encryption method: [Hina](https://github.com/hinabl)\n","- Main development: [Applio Team](https://github.com/IAHispano)"]},{"cell_type":"markdown","metadata":{},"source":["## Install"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["import codecs\n","import os\n","import shutil\n","import tarfile\n","import subprocess\n","from pathlib import Path\n","from IPython.display import clear_output, Javascript\n","rot_47 = lambda encoded_text: \"\".join(\n"," [\n"," (\n"," chr(\n"," (ord(c) - (ord(\"a\") if c.islower() else ord(\"A\")) - 47) % 26\n"," + (ord(\"a\") if c.islower() else ord(\"A\"))\n"," )\n"," if c.isalpha()\n"," else c\n"," )\n"," for c in encoded_text\n"," ]\n",")\n","\n","new_name = rot_47(\"kmjbmvh_hg\")\n","findme = rot_47(codecs.decode(\"pbbxa://oqbpcj.kwu/Dqlitvb/qurwg-mtnqvlmz.oqb\", \"rot_13\"))\n","uioawhd = rot_47(codecs.decode(\"pbbxa://oqbpcj.kwu/QIPqaxivw/Ixxtqw.oqb\", \"rot_13\"))\n","!pip install uv\n","!git clone --depth 1 $uioawhd $new_name --branch 3.2.4\n","clear_output()\n","!mkdir -p /kaggle/tmp\n","%cd /kaggle/tmp\n","!uv venv .venv > /dev/null 2>&1\n","def vidal_setup(ForceIn):\n"," def F():\n"," print(\"Installing pip packages...\")\n"," subprocess.check_call([\"uv\", \"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n","\n"," A = \"/kaggle/working\" + rot_47(\"Kikpm.ovm.bu\")\n"," D = \"/kaggle/tmp\"\n"," if not os.path.exists(A):\n"," M = os.path.dirname(A)\n"," os.makedirs(M, exist_ok=True)\n"," print(\"No cached install found..\")\n"," try:\n"," N = rot_47(codecs.decode(\"pbbxa://pcooqvonikm.kw/QIPqaxivw/Ixxtqw/zmawtdm/uiqv/Mvdqzwumvb/Siootm/SiootmD2.biz.oh?lwevtwil=bzcm\", \"rot_13\"))\n"," subprocess.run([\"wget\",\"-q\" ,\"-O\", A, N])\n"," print(\"Download completed successfully!\")\n"," except Exception as H:\n"," print(str(H))\n"," if os.path.exists(A):\n"," os.remove(A)\n"," if Path(A).exists():\n"," with tarfile.open(A, \"r:gz\") as I:\n"," I.extractall(D)\n"," print(f\"Extraction of {A} to {D} completed.\")\n"," if os.path.exists(A):\n"," os.remove(A)\n"," else:\n"," F()\n","\n","vidal_setup(False)\n","%cd /kaggle/working/program_ml\n","!source /kaggle/tmp/.venv/bin/activate; python core.py \"prerequisites\" --pretraineds_v1 \"False\" --pretraineds_v2 \"True\" --models \"True\" --exe \"False\" > /dev/null 2>&1\n","clear_output()\n","print(\"Finished\")"]},{"cell_type":"markdown","metadata":{},"source":["## Setup Ngrok"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["#https://dashboard.ngrok.com/get-started/your-authtoken (Token Ngrok)\n","!pip install pyngrok\n","!ngrok config add-authtoken token"]},{"cell_type":"markdown","metadata":{},"source":["## Start"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["from pyngrok import ngrok\n","ngrok.kill()\n","%cd /kaggle/working/program_ml\n","os.system(f\". /kaggle/tmp/.venv/bin/activate; imjoy-elfinder --root-dir=/kaggle --port 9876 > /dev/null 2>&1 &\")\n","clear_output()\n","%load_ext tensorboard\n","%tensorboard --logdir logs --port 8077\n","p_tunnel = ngrok.connect(6969)\n","t_tunnel = ngrok.connect(8077)\n","f_tunnel = ngrok.connect(9876)\n","clear_output()\n","print(\"Applio Url:\", p_tunnel.public_url)\n","print(\"Tensorboard Url:\", t_tunnel.public_url)\n","print(\"File Url:\", f_tunnel.public_url)\n","print(\"Save the link for later, this will take a while...\")\n","\n","!source /kaggle/tmp/.venv/bin/activate; python app.py"]},{"cell_type":"markdown","metadata":{"_kg_hide-input":false},"source":["## Extra"]},{"cell_type":"markdown","metadata":{},"source":["## Setup new logs folder format\n","- Put the exact name you put as your Model Name in Applio."]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","modelname = \"Test\"\n","logs_folder = f\"/kaggle/working/program_ml/logs/\" + modelname\n","\n","import os\n","\n","folder_renames = {\n"," \"0_gt_wavs\": \"sliced_audios\",\n"," \"1_16k_wavs\": \"sliced_audios_16k\",\n"," \"2a_f0\": \"f0\",\n"," \"2b-f0nsf\": \"f0_voiced\",\n"," \"3_feature768\": \"v2_extracted\"\n","}\n","\n","def rename_folders(base_path, rename_dict):\n"," for old_name, new_name in rename_dict.items():\n"," old_path = os.path.join(base_path, old_name)\n"," new_path = os.path.join(base_path, new_name)\n"," if os.path.exists(old_path):\n"," os.rename(old_path, new_path)\n"," print(f\"Renamed {old_path} to {new_path}\")\n"," else:\n"," print(f\"Folder {old_path} does not exist\")\n","\n","rename_folders(logs_folder, folder_renames)\n"]}],"metadata":{"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30558,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"}},"nbformat":4,"nbformat_minor":4}
diff --git a/assets/Applio_NoUI.ipynb b/assets/Applio_NoUI.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..a75b62309a074dd2773aba6af07b524a096b244a
--- /dev/null
+++ b/assets/Applio_NoUI.ipynb
@@ -0,0 +1,660 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0pKllbPyK_BC"
+ },
+ "source": [
+ "## **Applio NoUI**\n",
+ "A simple, high-quality voice conversion tool focused on ease of use and performance. \n",
+ "\n",
+ "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n",
+ "\n",
+ " \n",
+ "\n",
+ "### **Credits**\n",
+ "- Encryption method: [Hina](https://github.com/hinabl)\n",
+ "- Extra section: [Poopmaster](https://github.com/poiqazwsx)\n",
+ "- Main development: [Applio Team](https://github.com/IAHispano)\n",
+ "- Colab inspired on [RVC v2 Disconnected](https://colab.research.google.com/drive/1XIPCP9ken63S7M6b5ui1b36Cs17sP-NS)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Y-iR3WeLMlac"
+ },
+ "source": [
+ "### If you restart the runtime, run it again."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "xwZkZGd-H0zT"
+ },
+ "outputs": [],
+ "source": [
+ "%cd /content/Applio"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ymMCTSD6m8qV"
+ },
+ "source": [
+ "# Installation\n",
+ "## If the runtime restarts, run the cell above and re-run the installation steps."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "yFhAeKGOp9aa"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Mount Google Drive\n",
+ "from google.colab import drive\n",
+ "\n",
+ "drive.mount(\"/content/drive\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "7GysECSxBya4"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Clone\n",
+ "!git clone https://github.com/IAHispano/Applio --branch 3.2.4 --single-branch\n",
+ "%cd /content/Applio"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "CAXW55BQm0PP"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Install\n",
+ "rot_47 = lambda encoded_text: \"\".join(\n",
+ " [\n",
+ " (\n",
+ " chr(\n",
+ " (ord(c) - (ord(\"a\") if c.islower() else ord(\"A\")) - 47) % 26\n",
+ " + (ord(\"a\") if c.islower() else ord(\"A\"))\n",
+ " )\n",
+ " if c.isalpha()\n",
+ " else c\n",
+ " )\n",
+ " for c in encoded_text\n",
+ " ]\n",
+ ")\n",
+ "import codecs\n",
+ "import os\n",
+ "import tarfile\n",
+ "import subprocess\n",
+ "from pathlib import Path\n",
+ "def vidal_setup(C):\n",
+ " def F():\n",
+ " print(\"Installing pip packages...\")\n",
+ " subprocess.check_call([\"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n",
+ "\n",
+ " A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n",
+ " D = \"/\"\n",
+ " if not os.path.exists(A):\n",
+ " M = os.path.dirname(A)\n",
+ " os.makedirs(M, exist_ok=True)\n",
+ " print(\"No cached install found..\")\n",
+ " try:\n",
+ " N = codecs.decode(\n",
+ " \"uggcf://uhttvatsnpr.pb/VNUvfcnab/Nccyvb/erfbyir/znva/Raivebzrag/Pbyno/Cache.gne.tm\",\n",
+ " \"rot_13\",\n",
+ " )\n",
+ " subprocess.run([\"wget\", \"-O\", A, N])\n",
+ " print(\"Download completed successfully!\")\n",
+ " except Exception as H:\n",
+ " print(str(H))\n",
+ " if os.path.exists(A):\n",
+ " os.remove(A)\n",
+ " if Path(A).exists():\n",
+ " with tarfile.open(A, \"r:gz\") as I:\n",
+ " I.extractall(D)\n",
+ " print(f\"Extraction of {A} to {D} completed.\")\n",
+ " if os.path.exists(A):\n",
+ " os.remove(A)\n",
+ " if C:\n",
+ " F()\n",
+ " C = False\n",
+ " else:\n",
+ " F()\n",
+ "\n",
+ "\n",
+ "vidal_setup(False)\n",
+ "print(\"Finished installing requirements!\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "QlTibPnjmj6-"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Download models\n",
+ "!python core.py prerequisites"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YzaeMYsUE97Y"
+ },
+ "source": [
+ "# Infer\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "v0EgikgjFCjE"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Download model\n",
+ "# @markdown Hugging Face or Google Drive\n",
+ "model_link = \"https://huggingface.co/Darwin/Darwin/resolve/main/Darwin.zip\" # @param {type:\"string\"}\n",
+ "\n",
+ "!python core.py download --model_link \"{model_link}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "lrCKEOzvDPRu"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Run Inference\n",
+ "# @markdown Please upload the audio file to your Google Drive path `/content/drive/MyDrive` and specify its name here. For the model name, use the zip file name without the extension. Alternatively, you can check the path `/content/Applio/logs` for the model name (name of the folder).\n",
+ "\n",
+ "import os\n",
+ "\n",
+ "current_dir = os.getcwd()\n",
+ "\n",
+ "model_name = \"Darwin\" # @param {type:\"string\"}\n",
+ "model_folder = os.path.join(current_dir, f\"logs/{model_name}\")\n",
+ "\n",
+ "if not os.path.exists(model_folder):\n",
+ " raise FileNotFoundError(f\"Model directory not found: {model_folder}\")\n",
+ "\n",
+ "files_in_folder = os.listdir(model_folder)\n",
+ "pth_path = next((f for f in files_in_folder if f.endswith(\".pth\")), None)\n",
+ "index_file = next((f for f in files_in_folder if f.endswith(\".index\")), None)\n",
+ "\n",
+ "if pth_path is None or index_file is None:\n",
+ " raise FileNotFoundError(\"No model found.\")\n",
+ "\n",
+ "pth_file = os.path.join(model_folder, pth_path)\n",
+ "index_file = os.path.join(model_folder, index_file)\n",
+ "\n",
+ "input_path = \"/content/example.wav\" # @param {type:\"string\"}\n",
+ "output_path = \"/content/output.wav\"\n",
+ "export_format = \"WAV\" # @param ['WAV', 'MP3', 'FLAC', 'OGG', 'M4A'] {allow-input: false}\n",
+ "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\", \"fcpe\", \"hybrid[rmvpe+fcpe]\"] {allow-input: false}\n",
+ "f0_up_key = 0 # @param {type:\"slider\", min:-24, max:24, step:0}\n",
+ "filter_radius = 3 # @param {type:\"slider\", min:0, max:10, step:0}\n",
+ "rms_mix_rate = 0.8 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
+ "protect = 0.5 # @param {type:\"slider\", min:0.0, max:0.5, step:0.1}\n",
+ "index_rate = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
+ "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n",
+ "clean_strength = 0.7 # @param {type:\"slider\", min:0.0, max:1.0, step:0.1}\n",
+ "split_audio = False # @param{type:\"boolean\"}\n",
+ "clean_audio = False # @param{type:\"boolean\"}\n",
+ "f0_autotune = False # @param{type:\"boolean\"}\n",
+ "formant_shift = False # @param{type:\"boolean\"}\n",
+ "formant_qfrency = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n",
+ "formant_timbre = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n",
+ "\n",
+ "!python core.py infer --pitch \"{f0_up_key}\" --filter_radius \"{filter_radius}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\" --formant_shifting \"{formant_shift}\" --formant_qfrency \"{formant_qfrency}\" --formant_timbre \"{formant_timbre}\"\n",
+ "\n",
+ "from IPython.display import Audio, display, clear_output\n",
+ "\n",
+ "output_path = output_path.replace(\".wav\", f\".{export_format.lower()}\")\n",
+ "# clear_output()\n",
+ "display(Audio(output_path, autoplay=True))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1QkabnLlF2KB"
+ },
+ "source": [
+ "# Train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "oBzqm4JkGGa0"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Preprocess Dataset\n",
+ "import os\n",
+ "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
+ "model_name = \"Darwin\" # @param {type:\"string\"}\n",
+ "dataset_path = \"/content/drive/MyDrive/Darwin_Dataset\" # @param {type:\"string\"}\n",
+ "\n",
+ "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n",
+ "sr = int(sample_rate.rstrip(\"k\")) * 1000\n",
+ "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n",
+ "cut_preprocess = True # @param{type:\"boolean\"}\n",
+ "\n",
+ "!python core.py preprocess --model_name \"{model_name}\" --dataset_path \"{dataset_path}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --cut_preprocess \"{cut_preprocess}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "zWMiMYfRJTJv"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Extract Features\n",
+ "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n",
+ "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n",
+ "pitch_guidance = True # @param{type:\"boolean\"}\n",
+ "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n",
+ "\n",
+ "sr = int(sample_rate.rstrip(\"k\")) * 1000\n",
+ "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n",
+ "\n",
+ "!python core.py extract --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --f0_method \"{f0_method}\" --pitch_guidance \"{pitch_guidance}\" --hop_length \"{hop_length}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --gpu \"0\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "TI6LLdIzKAIa"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Train\n",
+ "# @markdown ### ➡️ Model Information\n",
+ "import threading\n",
+ "import time\n",
+ "import os\n",
+ "import shutil\n",
+ "import hashlib\n",
+ "import time\n",
+ "\n",
+ "LOGS_FOLDER = \"/content/Applio/logs/\"\n",
+ "GOOGLE_DRIVE_PATH = \"/content/drive/MyDrive/RVC_Backup\"\n",
+ "\n",
+ "\n",
+ "def import_google_drive_backup():\n",
+ " print(\"Importing Google Drive backup...\")\n",
+ " for root, dirs, files in os.walk(GOOGLE_DRIVE_PATH):\n",
+ " for filename in files:\n",
+ " filepath = os.path.join(root, filename)\n",
+ " if os.path.isfile(filepath):\n",
+ " backup_filepath = os.path.join(\n",
+ " LOGS_FOLDER, os.path.relpath(filepath, GOOGLE_DRIVE_PATH)\n",
+ " )\n",
+ " backup_folderpath = os.path.dirname(backup_filepath)\n",
+ " if not os.path.exists(backup_folderpath):\n",
+ " os.makedirs(backup_folderpath)\n",
+ " print(f\"Created backup folder: {backup_folderpath}\", flush=True)\n",
+ " shutil.copy2(filepath, backup_filepath)\n",
+ " print(f\"Imported file from Google Drive backup: {filename}\")\n",
+ " print(\"Google Drive backup import completed.\")\n",
+ "\n",
+ "\n",
+ "def get_md5_hash(file_path):\n",
+ " hash_md5 = hashlib.md5()\n",
+ " with open(file_path, \"rb\") as f:\n",
+ " for chunk in iter(lambda: f.read(4096), b\"\"):\n",
+ " hash_md5.update(chunk)\n",
+ " return hash_md5.hexdigest()\n",
+ "\n",
+ "\n",
+ "if \"autobackups\" not in globals():\n",
+ " autobackups = False\n",
+ "\n",
+ "\n",
+ "def backup_files():\n",
+ " print(\"\\nStarting backup loop...\")\n",
+ " last_backup_timestamps_path = os.path.join(\n",
+ " LOGS_FOLDER, \"last_backup_timestamps.txt\"\n",
+ " )\n",
+ " fully_updated = False\n",
+ "\n",
+ " while True:\n",
+ " try:\n",
+ " updated = False\n",
+ " last_backup_timestamps = {}\n",
+ "\n",
+ " try:\n",
+ " with open(last_backup_timestamps_path, \"r\") as f:\n",
+ " last_backup_timestamps = dict(line.strip().split(\":\") for line in f)\n",
+ " except FileNotFoundError:\n",
+ " pass\n",
+ "\n",
+ " for root, dirs, files in os.walk(LOGS_FOLDER):\n",
+ " if \"zips\" in dirs:\n",
+ " dirs.remove(\"zips\")\n",
+ " if \"mute\" in dirs:\n",
+ " dirs.remove(\"mute\")\n",
+ " for filename in files:\n",
+ " if filename != \"last_backup_timestamps.txt\":\n",
+ " filepath = os.path.join(root, filename)\n",
+ " if os.path.isfile(filepath):\n",
+ " backup_filepath = os.path.join(\n",
+ " GOOGLE_DRIVE_PATH,\n",
+ " os.path.relpath(filepath, LOGS_FOLDER),\n",
+ " )\n",
+ " backup_folderpath = os.path.dirname(backup_filepath)\n",
+ " if not os.path.exists(backup_folderpath):\n",
+ " os.makedirs(backup_folderpath)\n",
+ " print(\n",
+ " f\"Created backup folder: {backup_folderpath}\",\n",
+ " flush=True,\n",
+ " )\n",
+ " last_backup_timestamp = last_backup_timestamps.get(filepath)\n",
+ " current_timestamp = os.path.getmtime(filepath)\n",
+ " if (\n",
+ " last_backup_timestamp is None\n",
+ " or float(last_backup_timestamp) < current_timestamp\n",
+ " ):\n",
+ " shutil.copy2(filepath, backup_filepath)\n",
+ " last_backup_timestamps[filepath] = str(\n",
+ " current_timestamp\n",
+ " )\n",
+ " if last_backup_timestamp is None:\n",
+ " print(f\"Backed up file: {filename}\")\n",
+ " else:\n",
+ " print(f\"Updating backed up file: {filename}\")\n",
+ " updated = True\n",
+ " fully_updated = False\n",
+ "\n",
+ " for filepath in list(last_backup_timestamps.keys()):\n",
+ " if not os.path.exists(filepath):\n",
+ " backup_filepath = os.path.join(\n",
+ " GOOGLE_DRIVE_PATH, os.path.relpath(filepath, LOGS_FOLDER)\n",
+ " )\n",
+ " if os.path.exists(backup_filepath):\n",
+ " os.remove(backup_filepath)\n",
+ " print(f\"Deleted file: {filepath}\")\n",
+ " del last_backup_timestamps[filepath]\n",
+ " updated = True\n",
+ " fully_updated = False\n",
+ "\n",
+ " if not updated and not fully_updated:\n",
+ " print(\"Files are up to date.\")\n",
+ " fully_updated = True\n",
+ " sleep_time = 15\n",
+ " else:\n",
+ " sleep_time = 0.1\n",
+ "\n",
+ " with open(last_backup_timestamps_path, \"w\") as f:\n",
+ " for filepath, timestamp in last_backup_timestamps.items():\n",
+ " f.write(f\"{filepath}:{timestamp}\\n\")\n",
+ "\n",
+ " time.sleep(sleep_time)\n",
+ "\n",
+ " except Exception as error:\n",
+ " print(f\"An error occurred during backup: {str(error)}\")\n",
+ "\n",
+ "\n",
+ "if autobackups:\n",
+ " autobackups = False\n",
+ " print(\"Autobackup Disabled\")\n",
+ "else:\n",
+ " autobackups = True\n",
+ " print(\"Autobackup Enabled\") \n",
+ "\n",
+ "total_epoch = 800 # @param {type:\"integer\"}\n",
+ "batch_size = 15 # @param {type:\"slider\", min:1, max:25, step:0}\n",
+ "gpu = 0\n",
+ "sr = int(sample_rate.rstrip(\"k\")) * 1000\n",
+ "pitch_guidance = True # @param{type:\"boolean\"}\n",
+ "auto_backups = True # @param{type:\"boolean\"}\n",
+ "pretrained = True # @param{type:\"boolean\"}\n",
+ "sync_graph = False # @param{type:\"boolean\"}\n",
+ "cache_data_in_gpu = False # @param{type:\"boolean\"}\n",
+ "tensorboard = True # @param{type:\"boolean\"}\n",
+ "# @markdown ### ➡️ Choose how many epochs your model will be stored\n",
+ "save_every_epoch = 10 # @param {type:\"slider\", min:1, max:100, step:0}\n",
+ "save_only_latest = False # @param{type:\"boolean\"}\n",
+ "save_every_weights = False # @param{type:\"boolean\"}\n",
+ "overtraining_detector = False # @param{type:\"boolean\"}\n",
+ "overtraining_threshold = 50 # @param {type:\"slider\", min:1, max:100, step:0}\n",
+ "# @markdown ### ❓ Optional\n",
+ "# @markdown In case you select custom pretrained, you will have to download the pretraineds and enter the path of the pretraineds.\n",
+ "custom_pretrained = False # @param{type:\"boolean\"}\n",
+ "g_pretrained_path = \"/content/Applio/rvc/models/pretraineds/pretraineds_custom/G48k.pth\" # @param {type:\"string\"}\n",
+ "d_pretrained_path = \"/content/Applio/rvc/models/pretraineds/pretraineds_custom/D48k.pth\" # @param {type:\"string\"}\n",
+ "\n",
+ "if \"pretrained\" not in globals():\n",
+ " pretrained = True\n",
+ "\n",
+ "if \"custom_pretrained\" not in globals():\n",
+ " custom_pretrained = False\n",
+ "\n",
+ "if \"g_pretrained_path\" not in globals():\n",
+ " g_pretrained_path = \"Custom Path\"\n",
+ "\n",
+ "if \"d_pretrained_path\" not in globals():\n",
+ " d_pretrained_path = \"Custom Path\"\n",
+ "\n",
+ "\n",
+ "def start_train():\n",
+ " if tensorboard == True:\n",
+ " %load_ext tensorboard\n",
+ " %tensorboard --logdir /content/Applio/logs/\n",
+ " !python core.py train --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --save_every_epoch \"{save_every_epoch}\" --save_only_latest \"{save_only_latest}\" --save_every_weights \"{save_every_weights}\" --total_epoch \"{total_epoch}\" --sample_rate \"{sr}\" --batch_size \"{batch_size}\" --gpu \"{gpu}\" --pitch_guidance \"{pitch_guidance}\" --pretrained \"{pretrained}\" --custom_pretrained \"{custom_pretrained}\" --g_pretrained_path \"{g_pretrained_path}\" --d_pretrained_path \"{d_pretrained_path}\" --overtraining_detector \"{overtraining_detector}\" --overtraining_threshold \"{overtraining_threshold}\" --sync_graph \"{sync_graph}\" --cache_data_in_gpu \"{cache_data_in_gpu}\"\n",
+ "\n",
+ "\n",
+ "server_thread = threading.Thread(target=start_train)\n",
+ "server_thread.start()\n",
+ "\n",
+ "if auto_backups:\n",
+ " backup_files()\n",
+ "else:\n",
+ " while True:\n",
+ " time.sleep(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "bHLs5AT4Q1ck"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Generate index file\n",
+ "index_algorithm = \"Auto\" # @param [\"Auto\", \"Faiss\", \"KMeans\"] {allow-input: false}\n",
+ "!python core.py index --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --index_algorithm \"{index_algorithm}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "X_eU_SoiHIQg"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Save model\n",
+ "# @markdown Enter the name of the model and the steps. You can find it in your `/content/Applio/logs` folder.\n",
+ "%cd /content\n",
+ "import os, shutil, sys\n",
+ "\n",
+ "model_name = \"Darwin\" # @param {type:\"string\"}\n",
+ "model_epoch = 800 # @param {type:\"integer\"}\n",
+ "save_big_file = False # @param {type:\"boolean\"}\n",
+ "\n",
+ "if os.path.exists(\"/content/zips\"):\n",
+ " shutil.rmtree(\"/content/zips\")\n",
+ "print(\"Removed zips.\")\n",
+ "\n",
+ "os.makedirs(f\"/content/zips/{model_name}/\", exist_ok=True)\n",
+ "print(\"Created zips.\")\n",
+ "\n",
+ "logs_folder = f\"/content/Applio/logs/{model_name}/\"\n",
+ "weight_file = None\n",
+ "if not os.path.exists(logs_folder):\n",
+ " print(f\"Model folder not found.\")\n",
+ " sys.exit(\"\")\n",
+ "\n",
+ "for filename in os.listdir(logs_folder):\n",
+ " if filename.startswith(f\"{model_name}_{model_epoch}e\") and filename.endswith(\".pth\"):\n",
+ " weight_file = filename\n",
+ " break\n",
+ "if weight_file is None:\n",
+ " print(\"There is no weight file with that name\")\n",
+ " sys.exit(\"\")\n",
+ "if not save_big_file:\n",
+ " !cp {logs_folder}added_*.index /content/zips/{model_name}/\n",
+ " !cp {logs_folder}total_*.npy /content/zips/{model_name}/\n",
+ " !cp {logs_folder}{weight_file} /content/zips/{model_name}/\n",
+ " %cd /content/zips\n",
+ " !zip -r {model_name}.zip {model_name}\n",
+ "if save_big_file:\n",
+ " %cd /content/Applio\n",
+ " latest_steps = -1\n",
+ " logs_folder = \"./logs/\" + model_name\n",
+ " for filename in os.listdir(logs_folder):\n",
+ " if filename.startswith(\"G_\") and filename.endswith(\".pth\"):\n",
+ " steps = int(filename.split(\"_\")[1].split(\".\")[0])\n",
+ " if steps > latest_steps:\n",
+ " latest_steps = steps\n",
+ " MODELZIP = model_name + \".zip\"\n",
+ " !mkdir -p /content/zips\n",
+ " ZIPFILEPATH = os.path.join(\"/content/zips\", MODELZIP)\n",
+ " for filename in os.listdir(logs_folder):\n",
+ " if \"G_\" in filename or \"D_\" in filename:\n",
+ " if str(latest_steps) in filename:\n",
+ " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n",
+ " else:\n",
+ " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n",
+ "\n",
+ "!mkdir -p /content/drive/MyDrive/RVC_Backup/\n",
+ "shutil.move(\n",
+ " f\"/content/zips/{model_name}.zip\",\n",
+ " f\"/content/drive/MyDrive/RVC_Backup/{model_name}.zip\",\n",
+ ")\n",
+ "%cd /content/Applio\n",
+ "shutil.rmtree(\"/content/zips\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OaKoymXsyEYN"
+ },
+ "source": [
+ "# Resume-training"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "d3KgLAYnyHkP"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Load a Backup\n",
+ "from google.colab import drive\n",
+ "import os\n",
+ "import shutil\n",
+ "\n",
+ "# @markdown Put the exact name you put as your Model Name in Applio.\n",
+ "modelname = \"My-Project\" # @param {type:\"string\"}\n",
+ "source_path = \"/content/drive/MyDrive/RVC_Backup/\" + modelname\n",
+ "destination_path = \"/content/Applio/logs/\" + modelname\n",
+ "backup_timestamps_file = \"last_backup_timestamps.txt\"\n",
+ "if not os.path.exists(source_path):\n",
+ " print(\n",
+ " \"The model folder does not exist. Please verify the name is correct or check your Google Drive.\"\n",
+ " )\n",
+ "else:\n",
+ " time_ = os.path.join(\"/content/drive/MyDrive/RVC_Backup/\", backup_timestamps_file)\n",
+ " time__ = os.path.join(\"/content/Applio/logs/\", backup_timestamps_file)\n",
+ " if os.path.exists(time_):\n",
+ " shutil.copy(time_, time__)\n",
+ " shutil.copytree(source_path, destination_path)\n",
+ " print(\"Model backup loaded successfully.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "sc9DzvRCyJ2d"
+ },
+ "outputs": [],
+ "source": [
+ "# @title Set training variables\n",
+ "# @markdown ### ➡️ Use the same as you did previously\n",
+ "model_name = \"Darwin\" # @param {type:\"string\"}\n",
+ "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n",
+ "rvc_version = \"v2\" # @param [\"v2\", \"v1\"] {allow-input: false}\n",
+ "f0_method = \"rmvpe\" # @param [\"crepe\", \"crepe-tiny\", \"rmvpe\"] {allow-input: false}\n",
+ "hop_length = 128 # @param {type:\"slider\", min:1, max:512, step:0}\n",
+ "sr = int(sample_rate.rstrip(\"k\")) * 1000"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "collapsed_sections": [
+ "ymMCTSD6m8qV"
+ ],
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/assets/ICON.ico b/assets/ICON.ico
new file mode 100644
index 0000000000000000000000000000000000000000..340358a598d8a110c798431c8ca99bd580099b02
Binary files /dev/null and b/assets/ICON.ico differ
diff --git a/assets/audios/audio-others/.gitkeep b/assets/audios/audio-others/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/assets/config.json b/assets/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a53213c4a3ed07e38add965a83b180dd1f22dd15
--- /dev/null
+++ b/assets/config.json
@@ -0,0 +1,16 @@
+{
+ "theme": {
+ "file": "Applio.py",
+ "class": "Applio"
+ },
+ "plugins": [],
+ "discord_presence": true,
+ "lang": {
+ "override": false,
+ "selected_lang": "en_US"
+ },
+ "flask_server": false,
+ "version": "3.2.5",
+ "fake_gpu": false,
+ "model_author": "None"
+}
\ No newline at end of file
diff --git a/assets/discord_presence.py b/assets/discord_presence.py
new file mode 100644
index 0000000000000000000000000000000000000000..5600487b97be66e3cbcec9a923b254577e8350a6
--- /dev/null
+++ b/assets/discord_presence.py
@@ -0,0 +1,49 @@
+from pypresence import Presence
+import datetime as dt
+import time
+
+
+class RichPresenceManager:
+ def __init__(self):
+ self.client_id = "1144714449563955302"
+ self.rpc = None
+ self.running = False
+
+ def start_presence(self):
+ if not self.running:
+ self.running = True
+ self.rpc = Presence(self.client_id)
+ try:
+ self.rpc.connect()
+ self.update_presence()
+ except KeyboardInterrupt as error:
+ print(error)
+ self.rpc = None
+ self.running = False
+ except Exception as error:
+ print(f"An error occurred connecting to Discord: {error}")
+ self.rpc = None
+ self.running = False
+
+ def update_presence(self):
+ if self.rpc:
+ self.rpc.update(
+ state="applio.org",
+ details="Open ecosystem for voice cloning",
+ buttons=[
+ {"label": "Home", "url": "https://applio.org"},
+ {"label": "Download", "url": "https://applio.org/download"},
+ ],
+ large_image="logo",
+ large_text="Experimenting with applio",
+ start=dt.datetime.now().timestamp(),
+ )
+
+ def stop_presence(self):
+ self.running = False
+ if self.rpc:
+ self.rpc.close()
+ self.rpc = None
+
+
+RPCManager = RichPresenceManager()
diff --git a/assets/flask/routes.py b/assets/flask/routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf5171bef165cc779a2b761ecb98cd2930335707
--- /dev/null
+++ b/assets/flask/routes.py
@@ -0,0 +1,32 @@
+import os, sys
+import signal
+from flask import Flask, request, redirect
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from core import run_download_script
+
+app = Flask(__name__)
+
+
+@app.route("/download/", methods=["GET"])
+def download(url):
+ file_path = run_download_script(url)
+ if file_path == "Model downloaded successfully.":
+ if "text/html" in request.headers.get("Accept", ""):
+ return redirect("https://applio.org/models/downloaded", code=302)
+ else:
+ return ""
+ else:
+ return "Error: Unable to download file", 500
+
+
+@app.route("/shutdown", methods=["POST"])
+def shutdown():
+ print("This Flask server is shutting down... Please close the window!")
+ os.kill(os.getpid(), signal.SIGTERM)
+
+
+if __name__ == "__main__":
+ app.run(host="localhost", port=8000)
diff --git a/assets/flask/server.py b/assets/flask/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..13637c11c6c066d6ba24bec22ea9250903833a05
--- /dev/null
+++ b/assets/flask/server.py
@@ -0,0 +1,60 @@
+import os
+import socket
+import subprocess
+import time
+import requests
+import json
+
+# Constants
+NOW_DIR = os.getcwd()
+CONFIG_FILE = os.path.join(NOW_DIR, "assets", "config.json")
+ENV_PATH = os.path.join(NOW_DIR, "env", "python.exe")
+FLASK_SCRIPT_PATH = os.path.join(NOW_DIR, "assets", "flask", "routes.py")
+HOST = "localhost"
+PORT = 8000
+TIMEOUT = 2
+
+
+# Functions
+def start_flask():
+ """
+ Starts the Flask server if it's not already running.
+ """
+ try:
+ # Check if Flask server is already running
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+ sock.settimeout(TIMEOUT)
+ sock.connect((HOST, PORT))
+ print("Flask server is already running. Trying to restart it.")
+ requests.post("http://localhost:8000/shutdown")
+ time.sleep(3)
+
+ except socket.timeout:
+ # Start the Flask server
+ try:
+ subprocess.Popen(
+ [ENV_PATH, FLASK_SCRIPT_PATH],
+ creationflags=subprocess.CREATE_NEW_CONSOLE,
+ )
+ except Exception as error:
+ print(f"An error occurred starting the Flask server: {error}")
+
+
+def load_config_flask():
+ """
+ Loads the Flask server configuration from the config.json file.
+ """
+ with open(CONFIG_FILE, "r") as file:
+ config = json.load(file)
+ return config["flask_server"]
+
+
+def save_config(value):
+ """
+ Saves the Flask server configuration to the config.json file.
+ """
+ with open(CONFIG_FILE, "r", encoding="utf8") as file:
+ config = json.load(file)
+ config["flask_server"] = value
+ with open(CONFIG_FILE, "w", encoding="utf8") as file:
+ json.dump(config, file, indent=2)
diff --git a/assets/formant_shift/f2m.json b/assets/formant_shift/f2m.json
new file mode 100644
index 0000000000000000000000000000000000000000..895b95326353849269a45342711f0e5e9fae4269
--- /dev/null
+++ b/assets/formant_shift/f2m.json
@@ -0,0 +1,4 @@
+{
+ "formant_qfrency": 1.0,
+ "formant_timbre": 0.8
+}
diff --git a/assets/formant_shift/m2f.json b/assets/formant_shift/m2f.json
new file mode 100644
index 0000000000000000000000000000000000000000..24a2274c02846b4990f22eed3c1704100106abf0
--- /dev/null
+++ b/assets/formant_shift/m2f.json
@@ -0,0 +1,4 @@
+{
+ "formant_qfrency": 1.0,
+ "formant_timbre": 1.2
+}
diff --git a/assets/formant_shift/random.json b/assets/formant_shift/random.json
new file mode 100644
index 0000000000000000000000000000000000000000..f673d915c52acd26f1b9c639a0a6089703a8721f
--- /dev/null
+++ b/assets/formant_shift/random.json
@@ -0,0 +1,4 @@
+{
+ "formant_qfrency": 32.0,
+ "formant_timbre": 9.8
+}
diff --git a/assets/i18n/i18n.py b/assets/i18n/i18n.py
new file mode 100644
index 0000000000000000000000000000000000000000..295dc0d757fda726f72a5782fe2fbc5c9b728fbf
--- /dev/null
+++ b/assets/i18n/i18n.py
@@ -0,0 +1,52 @@
+import os, sys
+import json
+from pathlib import Path
+from locale import getdefaultlocale
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+
+class I18nAuto:
+ LANGUAGE_PATH = os.path.join(now_dir, "assets", "i18n", "languages")
+
+ def __init__(self, language=None):
+ with open(
+ os.path.join(now_dir, "assets", "config.json"), "r", encoding="utf8"
+ ) as file:
+ config = json.load(file)
+ override = config["lang"]["override"]
+ lang_prefix = config["lang"]["selected_lang"]
+
+ self.language = lang_prefix
+
+ if override == False:
+ language = language or getdefaultlocale()[0]
+ lang_prefix = language[:2] if language is not None else "en"
+ available_languages = self._get_available_languages()
+ matching_languages = [
+ lang for lang in available_languages if lang.startswith(lang_prefix)
+ ]
+ self.language = matching_languages[0] if matching_languages else "en_US"
+
+ self.language_map = self._load_language_list()
+
+ def _load_language_list(self):
+ try:
+ file_path = Path(self.LANGUAGE_PATH) / f"{self.language}.json"
+ with open(file_path, "r", encoding="utf-8") as file:
+ return json.load(file)
+ except FileNotFoundError:
+ raise FileNotFoundError(
+ f"Failed to load language file for {self.language}. Check if the correct .json file exists."
+ )
+
+ def _get_available_languages(self):
+ language_files = [path.stem for path in Path(self.LANGUAGE_PATH).glob("*.json")]
+ return language_files
+
+ def _language_exists(self, language):
+ return (Path(self.LANGUAGE_PATH) / f"{language}.json").exists()
+
+ def __call__(self, key):
+ return self.language_map.get(key, key)
diff --git a/assets/i18n/languages/ar_AR.json b/assets/i18n/languages/ar_AR.json
new file mode 100644
index 0000000000000000000000000000000000000000..ee9d7cce4664d2d8ac83d460e5bb257a1df2e218
--- /dev/null
+++ b/assets/i18n/languages/ar_AR.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "أداة استنساخ الصوت النهائية ، محسنة بدقة للحصول على قوة لا مثيل لها ، ونمطية ، وتجربة سهلة الاستخدام.",
+ "This section contains some extra utilities that often may be in experimental phases.": "يحتوي هذا القسم على بعض الأدوات المساعدة الإضافية التي قد تكون غالبا في المراحل التجريبية.",
+ "Output Information": "معلومات الإخراج",
+ "The output information will be displayed here.": "سيتم عرض معلومات الإخراج هنا.",
+ "Inference": "استدلال",
+ "Train": "قطار",
+ "Extra": "اضافيه",
+ "Merge Audios": "دمج الصوتيات",
+ "Processing": "تجهيز",
+ "Audio Analyzer": "محلل الصوت",
+ "Model Information": "معلومات النموذج",
+ "Plugins": "الإضافات",
+ "Download": "تحميل",
+ "Report a Bug": "الإبلاغ عن خطأ",
+ "Settings": "اعدادات",
+ "Preprocess": "المعالجة المسبقة",
+ "Model Name": "اسم الموديل",
+ "Name of the new model.": "اسم النموذج الجديد.",
+ "Enter model name": "أدخل اسم الطراز",
+ "Dataset Path": "مسار مجموعة البيانات",
+ "Path to the dataset folder.": "المسار إلى مجلد مجموعة البيانات.",
+ "Refresh Datasets": "تحديث مجموعات البيانات",
+ "Dataset Creator": "منشئ مجموعة البيانات",
+ "Dataset Name": "اسم مجموعة البيانات",
+ "Name of the new dataset.": "اسم مجموعة البيانات الجديدة.",
+ "Enter dataset name": "أدخل اسم مجموعة البيانات",
+ "Upload Audio Dataset": "تحميل مجموعة بيانات صوتية",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "تمت إضافة الملف الصوتي بنجاح إلى مجموعة البيانات. الرجاء النقر فوق زر المعالجة المسبقة.",
+ "Enter dataset path": "إدخال مسار مجموعة البيانات",
+ "Sampling Rate": "معدل أخذ العينات",
+ "The sampling rate of the audio files.": "معدل أخذ العينات من الملفات الصوتية.",
+ "Model Architecture": "نسخة RVC",
+ "Version of the model architecture.": "نسخة RVC من النموذج.",
+ "Preprocess Dataset": "مجموعة بيانات ما قبل المعالجة",
+ "Extract": "استخرج",
+ "Hop Length": "طول القفزة",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "يشير إلى المدة التي يستغرقها النظام للانتقال إلى تغيير كبير في درجة الصوت. تتطلب أطوال القفزات الأصغر مزيدا من الوقت للاستدلال ولكنها تميل إلى تحقيق دقة أعلى في درجة الصوت.",
+ "Batch Size": "حجم الدفعة",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "ينصح بمحاذاته مع VRAM المتاح لوحدة معالجة الرسومات الخاصة بك. يوفر الإعداد 4 دقة محسنة ولكن معالجة أبطأ ، بينما يوفر 8 نتائج أسرع وقياسية.",
+ "Save Every Epoch": "حفظ كل حقبة",
+ "Determine at how many epochs the model will saved at.": "حدد عدد الفترات التي سيتم حفظ النموذج فيها.",
+ "Total Epoch": "إجمالي العصر",
+ "Specifies the overall quantity of epochs for the model training process.": "يحدد الكمية الإجمالية للعهود لعملية التدريب النموذجية.",
+ "Pretrained": "التدريب المسبق",
+ "Save Only Latest": "حفظ الأحدث فقط",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "سيؤدي تمكين هذا الإعداد إلى حفظ ملفات G و D لأحدث إصداراتها فقط ، مما يوفر مساحة التخزين بشكل فعال.",
+ "Save Every Weights": "حفظ كل الأوزان",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "يمكنك هذا الإعداد من حفظ أوزان النموذج في نهاية كل حقبة.",
+ "Custom Pretrained": "تدريب مسبق مخصص",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "يمكن أن يؤدي استخدام النماذج المخصصة المدربة مسبقا إلى نتائج فائقة ، حيث أن اختيار النماذج الأكثر ملاءمة للاختبار المسبق والمصممة خصيصا لحالة الاستخدام المحددة يمكن أن يعزز الأداء بشكل كبير.",
+ "Upload Pretrained Model": "تحميل نموذج تم تدريبه مسبقا",
+ "Refresh Custom Pretraineds": "تحديث التدريبات المسبقة المخصصة",
+ "Pretrained Custom Settings": "الإعدادات المخصصة المدربة مسبقا",
+ "The file you dropped is not a valid pretrained file. Please try again.": "الملف الذي أسقطته ليس ملفا صالحا تم تدريبه مسبقا. يرجى المحاولة مرة أخرى.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "انقر فوق زر التحديث لرؤية الملف الذي تم اختباره مسبقا في القائمة المنسدلة.",
+ "Pretrained G Path": "مخصص مسبقا G",
+ "Pretrained D Path": "مخصص مسبق التدريب D",
+ "GPU Settings": "إعدادات وحدة معالجة الرسومات",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "يضبط إعدادات GPU المتقدمة ، الموصى بها للمستخدمين الذين لديهم بنية GPU أفضل.",
+ "GPU Custom Settings": "الإعدادات المخصصة لوحدة معالجة الرسومات",
+ "GPU Number": "رقم وحدة معالجة الرسومات",
+ "0 to ∞ separated by -": "0 إلى ∞ مفصولة ب -",
+ "GPU Information": "معلومات وحدة معالجة الرسومات",
+ "Pitch Guidance": "توجيه الملعب",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "من خلال استخدام توجيه درجة الصوت ، يصبح من الممكن عكس نغمة الصوت الأصلي ، بما في ذلك طبقة الصوت. هذه الميزة ذات قيمة خاصة للغناء والسيناريوهات الأخرى حيث يكون الحفاظ على اللحن الأصلي أو نمط طبقة الصوت أمرا ضروريا.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "استخدم النماذج المدربة مسبقا عند تدريب النماذج الخاصة بك. هذا النهج يقلل من مدة التدريب ويعزز الجودة الشاملة.",
+ "Extract Features": "استخراج الميزات",
+ "Start Training": "ابدأ التدريب",
+ "Generate Index": "إنشاء فهرس",
+ "Voice Model": "نموذج الصوت",
+ "Select the voice model to use for the conversion.": "حدد نموذج الصوت لاستخدامه في التحويل.",
+ "Index File": "ملف الفهرس",
+ "Select the index file to use for the conversion.": "حدد ملف الفهرس لاستخدامه للتحويل.",
+ "Refresh": "تحديث",
+ "Unload Voice": "تفريغ الصوت",
+ "Single": "واحد",
+ "Upload Audio": "تحميل الصوت",
+ "Select Audio": "حدد الصوت",
+ "Select the audio to convert.": "حدد الصوت المراد تحويله.",
+ "Advanced Settings": "الإعدادات المتقدمة",
+ "Clear Outputs (Deletes all audios in assets/audios)": "مخرجات واضحة (يحذف جميع الصوتيات في الأصول / الصوتيات)",
+ "Custom Output Path": "مسار الإخراج المخصص",
+ "Output Path": "مسار الإخراج",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "المسار الذي سيتم فيه حفظ الصوت الناتج ، افتراضيا في الأصول / الصوتيات / output.wav",
+ "Split Audio": "تقسيم الصوت",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "قسم الصوت إلى أجزاء للاستدلال للحصول على نتائج أفضل في بعض الحالات.",
+ "Autotune": "الضبط التلقائي",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "قم بتطبيق ضبط تلقائي ناعم على استنتاجاتك ، موصى به لغناء التحويلات.",
+ "Clean Audio": "صوت نظيف",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "قم بتنظيف إخراج الصوت باستخدام خوارزميات اكتشاف الضوضاء ، الموصى بها للتحدث الصوتيات.",
+ "Clean Strength": "قوة نظيفة",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "اضبط مستوى التنظيف على الصوت الذي تريده ، وكلما قمت بزيادته كلما تم تنظيفه ، ولكن من الممكن أن يكون الصوت أكثر ضغطا.",
+ "Pitch": "زفت",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "اضبط درجة الصوت ، وكلما زادت القيمة ، زادت درجة الصوت.",
+ "Filter Radius": "نصف قطر المرشح",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "إذا كان العدد أكبر من أو يساوي ثلاثة ، فإن استخدام الترشيح المتوسط على نتائج النغمة التي تم جمعها لديه القدرة على تقليل التنفس.",
+ "Search Feature Ratio": "نسبة ميزة البحث",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "التأثير الذي يمارسه ملف الفهرس ؛ قيمة أعلى يتوافق مع تأثير أكبر. ومع ذلك ، يمكن أن يساعد اختيار القيم الأقل في التخفيف من العناصر الموجودة في الصوت.",
+ "Volume Envelope": "مغلف الحجم",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "استبدل أو امزج مع مغلف حجم المخرجات. كلما اقتربت النسبة من 1 ، زاد استخدام مغلف الإخراج.",
+ "Protect Voiceless Consonants": "حماية الحروف الساكنة التي لا صوت لها",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "حماية الحروف الساكنة المميزة وأصوات التنفس لمنع التمزق الصوتي الكهربائي والتحف الأخرى. يوفر سحب المعلمة إلى قيمتها القصوى البالغة 0.5 حماية شاملة. ومع ذلك ، قد يؤدي تقليل هذه القيمة إلى تقليل مدى الحماية مع احتمال التخفيف من تأثير الفهرسة.",
+ "Pitch extraction algorithm": "خوارزمية استخراج الملعب",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "خوارزمية استخراج الملعب لاستخدامها في تحويل الصوت. الخوارزمية الافتراضية هي rmvpe ، والتي يوصى بها في معظم الحالات.",
+ "Convert": "حول",
+ "Export Audio": "تصدير الصوت",
+ "Batch": "الدفعه",
+ "Input Folder": "مجلد الإدخال",
+ "Select the folder containing the audios to convert.": "حدد المجلد الذي يحتوي على الصوتيات المراد تحويلها.",
+ "Enter input path": "أدخل مسار الإدخال",
+ "Output Folder": "مجلد الإخراج",
+ "Select the folder where the output audios will be saved.": "حدد المجلد حيث سيتم حفظ صوتيات الإخراج.",
+ "Enter output path": "أدخل مسار الإخراج",
+ "Get information about the audio": "الحصول على معلومات حول الصوت",
+ "Information about the audio file": "معلومات حول الملف الصوتي",
+ "Waiting for information...": "في انتظار المعلومات...",
+ "## Voice Blender": "## خلاط الصوت",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "حدد نموذجين صوتيين ، وقم بتعيين نسبة المزج التي تريدها ، وامزجهما في صوت جديد تماما.",
+ "Voice Blender": "خلاط الصوت",
+ "Drag and drop your model here": "قم بسحب وإسقاط النموذج الخاص بك هنا",
+ "You can also use a custom path.": "يمكنك أيضا استخدام مسار مخصص.",
+ "Blend Ratio": "نسبة المزج",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "سيؤدي ضبط الموضع أكثر نحو جانب أو آخر إلى جعل النموذج أكثر تشابها مع الأول أو الثاني.",
+ "Fusion": "اندماج",
+ "Path to Model": "الطريق إلى النموذج",
+ "Enter path to model": "أدخل المسار إلى النموذج",
+ "Model information to be placed": "معلومات النموذج المراد وضعها",
+ "Inroduce the model information": "Inroduce معلومات النموذج",
+ "The information to be placed in the model (You can leave it blank or put anything).": "المعلومات المراد وضعها في النموذج (يمكنك تركها فارغة أو وضع أي شيء).",
+ "View model information": "عرض معلومات النموذج",
+ "Introduce the model pth path": "تقديم نموذج مسار pth",
+ "View": "منظر",
+ "Model extraction": "استخراج النموذج",
+ "Model conversion": "تحويل النموذج",
+ "Pth file": "ملف Pth",
+ "Output of the pth file": "إخراج ملف pth",
+ "# How to Report an Issue on GitHub": "# كيفية الإبلاغ عن مشكلة على GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. انقر فوق الزر \"شاشة التسجيل\" أدناه لبدء تسجيل المشكلة التي تواجهها.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. بمجرد الانتهاء من تسجيل المشكلة ، انقر فوق الزر \"إيقاف التسجيل\" (نفس الزر ، لكن التسمية تتغير اعتمادا على ما إذا كنت تقوم بالتسجيل بنشاط أم لا).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. انتقل إلى [مشكلات GitHub] (https://github.com/IAHispano/Applio/issues) وانقر على زر \"إصدار جديد\".",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. أكمل نموذج المشكلة المقدم ، مع التأكد من تضمين التفاصيل حسب الحاجة ، واستخدم قسم الأصول لتحميل الملف المسجل من الخطوة السابقة.",
+ "Record Screen": "شاشة التسجيل",
+ "Record": "سجل",
+ "Stop Recording": "إيقاف التسجيل",
+ "Introduce the model .pth path": "تقديم نموذج مسار .pth",
+ "See Model Information": "انظر معلومات النموذج",
+ "## Download Model": "## تحميل الموديل",
+ "Model Link": "رابط النموذج",
+ "Introduce the model link": "تقديم رابط النموذج",
+ "Download Model": "تحميل الموديل",
+ "## Drop files": "## إسقاط الملفات",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "اسحب ملف .pth وملف .index إلى هذه المساحة. اسحب أحدهما ثم الآخر.",
+ "TTS Voices": "أصوات تحويل النص إلى كلام",
+ "Select the TTS voice to use for the conversion.": "حدد صوت TTS لاستخدامه في التحويل.",
+ "Text to Synthesize": "النص المراد توليفه",
+ "Enter the text to synthesize.": "أدخل النص المراد توليفه.",
+ "Or you can upload a .txt file": "أو يمكنك تحميل ملف .txt",
+ "Enter text to synthesize": "أدخل نصا لتوليفه",
+ "Output Path for TTS Audio": "مسار الإخراج لصوت TTS",
+ "Output Path for RVC Audio": "مسار الإخراج لصوت RVC",
+ "Enable Applio integration with Discord presence": "تمكين تكامل Applio مع وجود Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "سيتم تنشيط إمكانية عرض نشاط Applio الحالي في Discord.",
+ "Enable Applio integration with applio.org/models using flask": "تمكين تكامل Applio مع applio.org/models باستخدام القارورة",
+ "It will activate the possibility of downloading models with a click from the website.": "سيتم تنشيط إمكانية تنزيل النماذج بنقرة واحدة من الموقع.",
+ "Theme": "موضوع",
+ "Select the theme you want to use. (Requires restarting Applio)": "حدد السمة التي تريد استخدامها. (يتطلب إعادة تشغيل Applio)",
+ "Language": "اللغة",
+ "Select the language you want to use. (Requires restarting Applio)": "حدد اللغة التي تريد استخدامها. (يتطلب إعادة تشغيل Applio)",
+ "Plugin Installer": "مثبت البرنامج المساعد",
+ "Drag your plugin.zip to install it": "اسحب plugin.zip لتثبيته",
+ "Version Checker": "مدقق الإصدار",
+ "Check which version of Applio is the latest to see if you need to update.": "تحقق من إصدار Applio هو الأحدث لمعرفة ما إذا كنت بحاجة إلى التحديث.",
+ "Check for updates": "التحقق من وجود تحديثات"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/bn_BN.json b/assets/i18n/languages/bn_BN.json
new file mode 100644
index 0000000000000000000000000000000000000000..22cb83aa3a677834dc2b05d7280df1759d1a3aed
--- /dev/null
+++ b/assets/i18n/languages/bn_BN.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "আলটিমেট ভয়েস ক্লোনিং টুল, অতুলনীয় শক্তি, মডুলারিটি এবং ব্যবহারকারী-বান্ধব অভিজ্ঞতার জন্য নিখুঁতভাবে অপ্টিমাইজ করা।",
+ "This section contains some extra utilities that often may be in experimental phases.": "এই বিভাগে কিছু অতিরিক্ত ইউটিলিটি রয়েছে যা প্রায়শই পরীক্ষামূলক পর্যায়ে থাকতে পারে।",
+ "Output Information": "আউটপুট তথ্য",
+ "The output information will be displayed here.": "আউটপুট তথ্য এখানে প্রদর্শিত হবে।",
+ "Inference": "অনুমান",
+ "Train": "ট্রেন",
+ "Extra": "অতিরিক্ত",
+ "Merge Audios": "অডিওগুলি মার্জ করুন",
+ "Processing": "প্রক্রিয়াকরণ",
+ "Audio Analyzer": "অডিও বিশ্লেষক",
+ "Model Information": "মডেল তথ্য",
+ "Plugins": "প্লাগইন",
+ "Download": "ডাউনলোড",
+ "Report a Bug": "একটি বাগ রিপোর্ট করুন",
+ "Settings": "সেটিংস",
+ "Preprocess": "প্রিপ্রসেস",
+ "Model Name": "মডেলের নাম",
+ "Name of the new model.": "নতুন মডেলের নাম",
+ "Enter model name": "মডেলের নাম লিখুন",
+ "Dataset Path": "ডেটাসেট পাথ",
+ "Path to the dataset folder.": "ডেটাসেট ফোল্ডারে পাথ।",
+ "Refresh Datasets": "ডেটাসেট রিফ্রেশ করুন",
+ "Dataset Creator": "ডেটাসেট স্রষ্টা",
+ "Dataset Name": "ডেটাসেটের নাম",
+ "Name of the new dataset.": "নতুন ডেটাসেটের নাম",
+ "Enter dataset name": "তথ্যসেটের নাম লিখুন",
+ "Upload Audio Dataset": "অডিও ডেটাসেট আপলোড করুন",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "অডিও ফাইলটি সফলভাবে ডেটাসেটে যুক্ত করা হয়েছে। অনুগ্রহ করে প্রিপ্রসেস বাটনে ক্লিক করুন।",
+ "Enter dataset path": "ডেটাসেটের পথ লিখুন",
+ "Sampling Rate": "নমুনা হার",
+ "The sampling rate of the audio files.": "অডিও ফাইলের নমুনা হার।",
+ "Model Architecture": "আরভিসি সংস্করণ",
+ "Version of the model architecture.": "মডেলটির আরভিসি সংস্করণ।",
+ "Preprocess Dataset": "প্রিপ্রসেস ডেটাসেট",
+ "Extract": "নিষ্কাশন",
+ "Hop Length": "হপ দৈর্ঘ্য",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "সিস্টেমটি একটি উল্লেখযোগ্য পিচ পরিবর্তনে রূপান্তরিত হতে যে সময়কাল নেয় তা বোঝায়। ছোট হপ দৈর্ঘ্যের জন্য অনুমানের জন্য আরও সময় প্রয়োজন তবে উচ্চতর পিচ নির্ভুলতা অর্জন করে।",
+ "Batch Size": "ব্যাচের আকার",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "এটি আপনার জিপিইউর উপলব্ধ ভিআরএএমের সাথে সারিবদ্ধ করার পরামর্শ দেওয়া হচ্ছে। 4 এর একটি সেটিং উন্নত নির্ভুলতা সরবরাহ করে তবে ধীর প্রক্রিয়াজাতকরণ, যখন 8 দ্রুত এবং মানক ফলাফল সরবরাহ করে।",
+ "Save Every Epoch": "প্রতিটি যুগ সংরক্ষণ করুন",
+ "Determine at how many epochs the model will saved at.": "মডেলটি কতগুলি যুগে সংরক্ষণ করবে তা নির্ধারণ করুন।",
+ "Total Epoch": "মোট যুগ",
+ "Specifies the overall quantity of epochs for the model training process.": "মডেল প্রশিক্ষণ প্রক্রিয়ার জন্য যুগের সামগ্রিক পরিমাণ উল্লেখ করে।",
+ "Pretrained": "পূর্বনির্ধারিত",
+ "Save Only Latest": "শুধুমাত্র সর্বশেষ সংরক্ষণ করুন",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "এই সেটিংটি সক্ষম করার ফলে জি এবং ডি ফাইলগুলি কেবলমাত্র তাদের সাম্প্রতিকতম সংস্করণগুলি সংরক্ষণ করবে, কার্যকরভাবে স্টোরেজ স্পেস সংরক্ষণ করবে।",
+ "Save Every Weights": "প্রতিটি ওজন সংরক্ষণ করুন",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "এই সেটিংটি আপনাকে প্রতিটি যুগের শেষে মডেলের ওজন সংরক্ষণ করতে সক্ষম করে।",
+ "Custom Pretrained": "কাস্টম প্রিট্রেইনড",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "কাস্টম প্রিট্রেনড মডেলগুলি ব্যবহার করা উচ্চতর ফলাফলের দিকে পরিচালিত করতে পারে, কারণ নির্দিষ্ট ব্যবহারের ক্ষেত্রে উপযুক্ত প্রিট্রেনড মডেলগুলি নির্বাচন করা কর্মক্ষমতা উল্লেখযোগ্যভাবে বাড়িয়ে তুলতে পারে।",
+ "Upload Pretrained Model": "প্রিট্রেনড মডেল আপলোড করুন",
+ "Refresh Custom Pretraineds": "কাস্টম প্রিট্রেনেডগুলি রিফ্রেশ করুন",
+ "Pretrained Custom Settings": "পূর্বনির্ধারিত কাস্টম সেটিংস",
+ "The file you dropped is not a valid pretrained file. Please try again.": "আপনার ফেলে দেওয়া ফাইলটি একটি বৈধ পূর্বপ্রশিক্ষিত ফাইল নয়. অনুগ্রহ করে আবার চেষ্টা করুন।",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "ড্রপডাউন মেনুতে প্রিট্রেনড ফাইলটি দেখতে রিফ্রেশ বোতামটি ক্লিক করুন।",
+ "Pretrained G Path": "কাস্টম প্রিট্রেনড জি",
+ "Pretrained D Path": "কাস্টম প্রিট্রেনড ডি",
+ "GPU Settings": "জিপিইউ সেটিংস",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "উন্নত GPU সেটিংস সেট করে, আরও ভাল GPU আর্কিটেকচার সহ ব্যবহারকারীদের জন্য প্রস্তাবিত।",
+ "GPU Custom Settings": "GPU কাস্টম সেটিংস",
+ "GPU Number": "জিপিইউ নম্বর",
+ "0 to ∞ separated by -": "0 থেকে ∞ দ্বারা পৃথক করা হয় -",
+ "GPU Information": "জিপিইউ তথ্য",
+ "Pitch Guidance": "পিচ গাইডেন্স",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "পিচ গাইডেন্স নিয়োগ করে, এর পিচ সহ মূল ভয়েসের স্বরভঙ্গিটি মিরর করা সম্ভব হয়। এই বৈশিষ্ট্যটি গাওয়া এবং অন্যান্য পরিস্থিতিতে বিশেষত মূল্যবান যেখানে মূল সুর বা পিচ প্যাটার্ন সংরক্ষণ করা অপরিহার্য।",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "আপনার নিজের প্রশিক্ষণের সময় প্রিপ্রশিক্ষিত মডেলগুলি ব্যবহার করুন। এই পদ্ধতির প্রশিক্ষণের সময়কাল হ্রাস করে এবং সামগ্রিক মান বাড়ায়।",
+ "Extract Features": "এক্সট্রাক্ট বৈশিষ্ট্য",
+ "Start Training": "প্রশিক্ষণ শুরু করুন",
+ "Generate Index": "সূচী তৈরি করুন",
+ "Voice Model": "ভয়েস মডেল",
+ "Select the voice model to use for the conversion.": "রূপান্তরটির জন্য ব্যবহার করতে ভয়েস মডেলটি নির্বাচন করুন।",
+ "Index File": "সূচী ফাইল",
+ "Select the index file to use for the conversion.": "রূপান্তরটির জন্য ব্যবহার করতে সূচী ফাইলটি নির্বাচন করুন।",
+ "Refresh": "সতেজ",
+ "Unload Voice": "ভয়েস আনলোড করুন",
+ "Single": "একক",
+ "Upload Audio": "অডিও আপলোড করুন",
+ "Select Audio": "অডিও নির্বাচন করুন",
+ "Select the audio to convert.": "রূপান্তর করতে অডিও নির্বাচন করুন।",
+ "Advanced Settings": "উন্নত সেটিংস",
+ "Clear Outputs (Deletes all audios in assets/audios)": "আউটপুট সাফ করুন (সম্পদ / অডিওতে সমস্ত অডিও মুছে ফেলে)",
+ "Custom Output Path": "কাস্টম আউটপুট পাথ",
+ "Output Path": "আউটপুট পাথ",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "পাথ যেখানে আউটপুট অডিও সংরক্ষণ করা হবে, সম্পদ / অডিও / output.wav ডিফল্টরূপে",
+ "Split Audio": "অডিও বিভক্ত করুন",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "কিছু ক্ষেত্রে আরও ভাল ফলাফল পেতে অনুমানের জন্য অডিওটিকে খণ্ডগুলিতে বিভক্ত করুন।",
+ "Autotune": "অটোটিউন",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "আপনার অনুমানগুলিতে একটি নরম অটোটিউন প্রয়োগ করুন, রূপান্তরগুলি গাওয়ার জন্য প্রস্তাবিত।",
+ "Clean Audio": "পরিষ্কার অডিও",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "অডিও বলার জন্য প্রস্তাবিত কোলাহল শনাক্তকরণ অ্যালগরিদমগুলি ব্যবহার করে আপনার অডিও আউটপুট পরিষ্কার করুন।",
+ "Clean Strength": "পরিষ্কার শক্তি",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "আপনি যে অডিওটি চান তাতে ক্লিন-আপ স্তরটি সেট করুন, আপনি এটি যত বাড়াবেন তত বেশি এটি পরিষ্কার হবে, তবে এটি সম্ভব যে অডিওটি আরও সংকুচিত হবে।",
+ "Pitch": "পিচ",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "অডিওর পিচ সেট করুন, মান যত বেশি, পিচ তত বেশি।",
+ "Filter Radius": "ফিল্টার ব্যাসার্ধ",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "যদি সংখ্যাটি তিনটির চেয়ে বেশি বা সমান হয় তবে সংগৃহীত স্বন ফলাফলগুলিতে মধ্যমা ফিল্টারিং নিয়োগ করা শ্বাসকষ্ট হ্রাস করার সম্ভাবনা রয়েছে।",
+ "Search Feature Ratio": "অনুসন্ধান বৈশিষ্ট্য অনুপাত",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "ইনডেক্স ফাইল দ্বারা প্রভাবিত; একটি উচ্চতর মান বৃহত্তর প্রভাবের সাথে মিলে যায়। তবে, নিম্ন মানগুলি বেছে নেওয়া অডিওতে উপস্থিত নিদর্শনগুলি প্রশমিত করতে সহায়তা করতে পারে।",
+ "Volume Envelope": "ভলিউম খাম",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "আউটপুটের ভলিউম খামের সাথে বিকল্প বা মিশ্রণ করুন। অনুপাতটি 1 এর কাছাকাছি হয়, তত বেশি আউটপুট খাম নিযুক্ত করা হয়।",
+ "Protect Voiceless Consonants": "কণ্ঠহীন ব্যঞ্জনবর্ণ রক্ষা করুন",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "ইলেক্ট্রো-অ্যাকোস্টিক ছিঁড়ে যাওয়া এবং অন্যান্য নিদর্শনগুলি রোধ করতে স্বতন্ত্র ব্যঞ্জনবর্ণ এবং শ্বাস প্রশ্বাসের শব্দগুলি রক্ষা করুন। প্যারামিটারটিকে তার সর্বোচ্চ মান 0.5 এ টানলে ব্যাপক সুরক্ষা সরবরাহ করে। যাইহোক, এই মান হ্রাস করা সম্ভাব্যভাবে সূচক প্রভাব প্রশমিত করার সময় সুরক্ষার পরিমাণ হ্রাস করতে পারে।",
+ "Pitch extraction algorithm": "পিচ নিষ্কাশন অ্যালগরিদম",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "অডিও রূপান্তর জন্য ব্যবহার করতে পিচ নিষ্কাশন অ্যালগরিদম. ডিফল্ট অ্যালগরিদমটি আরএমভিপিই, যা বেশিরভাগ ক্ষেত্রে প্রস্তাবিত।",
+ "Convert": "রূপান্তর",
+ "Export Audio": "অডিও রপ্তানি করুন",
+ "Batch": "ব্যাচ",
+ "Input Folder": "ইনপুট ফোল্ডার",
+ "Select the folder containing the audios to convert.": "রূপান্তর করতে অডিওযুক্ত ফোল্ডারটি নির্বাচন করুন।",
+ "Enter input path": "ইনপুট পথ লিখুন",
+ "Output Folder": "আউটপুট ফোল্ডার",
+ "Select the folder where the output audios will be saved.": "ফোল্ডারটি নির্বাচন করুন যেখানে আউটপুট অডিওগুলি সংরক্ষণ করা হবে।",
+ "Enter output path": "আউটপুট পথ লিখুন",
+ "Get information about the audio": "অডিও সম্পর্কে তথ্য পান",
+ "Information about the audio file": "অডিও ফাইল সম্পর্কে তথ্য",
+ "Waiting for information...": "তথ্যের অপেক্ষায়...",
+ "## Voice Blender": "## ভয়েস ব্লেন্ডার",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "দুটি ভয়েস মডেল নির্বাচন করুন, আপনার পছন্দসই মিশ্রণের শতাংশ সেট করুন এবং এগুলি সম্পূর্ণ নতুন ভয়েসে মিশ্রিত করুন।",
+ "Voice Blender": "ভয়েস ব্লেন্ডার",
+ "Drag and drop your model here": "আপনার মডেলটি এখানে টেনে এনে ছেড়ে দিন",
+ "You can also use a custom path.": "আপনি একটি কাস্টম পাথও ব্যবহার করতে পারেন।",
+ "Blend Ratio": "ব্লেন্ড রেশিও",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "একপাশে বা অন্যদিকে অবস্থানটি আরও সামঞ্জস্য করা মডেলটিকে প্রথম বা দ্বিতীয়টির সাথে আরও অনুরূপ করে তুলবে।",
+ "Fusion": "ফিউশন",
+ "Path to Model": "মডেলের পথ",
+ "Enter path to model": "মডেলের পথ লিখুন",
+ "Model information to be placed": "মডেল তথ্য স্থাপন করা হবে",
+ "Inroduce the model information": "মডেলের তথ্য ইনরোডিউস করুন",
+ "The information to be placed in the model (You can leave it blank or put anything).": "মডেলটিতে যে তথ্য রাখতে হবে (আপনি এটি ফাঁকা রেখে দিতে পারেন বা কিছু রাখতে পারেন)।",
+ "View model information": "মডেল তথ্য দেখুন",
+ "Introduce the model pth path": "মডেল পিটিএইচ পাথ পরিচয় করিয়ে দিন",
+ "View": "দর্শন",
+ "Model extraction": "মডেল নিষ্কাশন",
+ "Model conversion": "মডেল রূপান্তর",
+ "Pth file": "Pth ফাইল",
+ "Output of the pth file": "পিটিএইচ ফাইলের আউটপুট",
+ "# How to Report an Issue on GitHub": "# গিটহাবে একটি সমস্যা কিভাবে রিপোর্ট করবেন",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. আপনি যে সমস্যার সম্মুখীন হচ্ছেন তা রেকর্ড করা শুরু করতে নীচের 'রেকর্ড স্ক্রিন' বোতামে ক্লিক করুন।",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. একবার আপনি সমস্যাটি রেকর্ড করা শেষ করার পরে, 'রেকর্ডিং বন্ধ করুন' বোতামে ক্লিক করুন (একই বোতাম, তবে আপনি সক্রিয়ভাবে রেকর্ড করছেন কিনা তার উপর নির্ভর করে লেবেলটি পরিবর্তিত হয়)।",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "৩. [GitHub Issues](https://github.com/IAHispano/Applio/issues) এ যান এবং 'New Issue' বাটনে ক্লিক করুন।",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. প্রদত্ত ইস্যু টেমপ্লেটটি সম্পূর্ণ করুন, প্রয়োজন অনুসারে বিশদ অন্তর্ভুক্ত করা নিশ্চিত করুন এবং পূর্ববর্তী পদক্ষেপ থেকে রেকর্ড করা ফাইলটি আপলোড করতে সম্পদ বিভাগটি ব্যবহার করুন।",
+ "Record Screen": "রেকর্ড স্ক্রিন",
+ "Record": "রেকর্ড",
+ "Stop Recording": "রেকর্ডিং বন্ধ করুন",
+ "Introduce the model .pth path": "মডেল .pth পাথ পরিচয় করিয়ে দিন",
+ "See Model Information": "মডেল তথ্য দেখুন",
+ "## Download Model": "## মডেল ডাউনলোড করুন",
+ "Model Link": "মডেল লিংক",
+ "Introduce the model link": "মডেল লিঙ্কটি পরিচয় করিয়ে দিন",
+ "Download Model": "মডেল ডাউনলোড করুন",
+ "## Drop files": "## ফাইল ড্রপ করুন",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "আপনার .pth ফাইল এবং .index ফাইলটি এই স্পেসে টেনে আনুন। একটা টেনে আনুন, তারপর অন্যটা।",
+ "TTS Voices": "টিটিএস ভয়েসেস",
+ "Select the TTS voice to use for the conversion.": "রূপান্তরটির জন্য ব্যবহার করতে TTS ভয়েস নির্বাচন করুন।",
+ "Text to Synthesize": "সংশ্লেষণ করার জন্য পাঠ্য",
+ "Enter the text to synthesize.": "সংশ্লেষ করতে পাঠ্যটি প্রবেশ করান।",
+ "Or you can upload a .txt file": "অথবা আপনি একটি .txt ফাইল আপলোড করতে পারেন",
+ "Enter text to synthesize": "সংশ্লেষ করতে পাঠ্য লিখুন",
+ "Output Path for TTS Audio": "TTS অডিওর জন্য আউটপুট পাথ",
+ "Output Path for RVC Audio": "আরভিসি অডিওর জন্য আউটপুট পাথ",
+ "Enable Applio integration with Discord presence": "ডিসকর্ড উপস্থিতি সহ অ্যাপলিও ইন্টিগ্রেশন সক্ষম করুন",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "এটি ডিসকর্ডে বর্তমান অ্যাপলিও ক্রিয়াকলাপ প্রদর্শনের সম্ভাবনা সক্রিয় করবে।",
+ "Enable Applio integration with applio.org/models using flask": "ফ্লাস্ক ব্যবহার করে applio.org/models সাথে অ্যাপলিও ইন্টিগ্রেশন সক্ষম করুন",
+ "It will activate the possibility of downloading models with a click from the website.": "এটি ওয়েবসাইট থেকে একটি ক্লিকের সাথে মডেলগুলি ডাউনলোড করার সম্ভাবনা সক্রিয় করবে।",
+ "Theme": "থীম",
+ "Select the theme you want to use. (Requires restarting Applio)": "আপনি যে থিমটি ব্যবহার করতে চান তা নির্বাচন করুন। (অ্যাপলিও পুনরায় চালু করা প্রয়োজন)",
+ "Language": "ভাষা",
+ "Select the language you want to use. (Requires restarting Applio)": "আপনি যে ভাষাটি ব্যবহার করতে চান তা নির্বাচন করুন। (অ্যাপলিও পুনরায় চালু করা প্রয়োজন)",
+ "Plugin Installer": "প্লাগইন ইনস্টলার",
+ "Drag your plugin.zip to install it": "এটি ইনস্টল করতে আপনার plugin.zip টেনে আনুন",
+ "Version Checker": "সংস্করণ পরীক্ষক",
+ "Check which version of Applio is the latest to see if you need to update.": "আপনার আপডেট করার প্রয়োজন আছে কিনা তা দেখতে অ্যাপলিওর কোন সংস্করণটি সর্বশেষতম তা পরীক্ষা করে দেখুন।",
+ "Check for updates": "আপডেটের জন্য পরীক্ষা করুন"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/de_DE.json b/assets/i18n/languages/de_DE.json
new file mode 100644
index 0000000000000000000000000000000000000000..74553d4f9bce89cebf67aef5246bda1b2c7a7de1
--- /dev/null
+++ b/assets/i18n/languages/de_DE.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Ultimatives Tool zum Klonen von Stimmen, das sorgfältig für unübertroffene Leistung, Modularität und Benutzerfreundlichkeit optimiert wurde.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Dieser Abschnitt enthält einige zusätzliche Dienstprogramme, die sich häufig in experimentellen Phasen befinden.",
+ "Output Information": "Informationen zur Ausgabe",
+ "The output information will be displayed here.": "Hier werden die Ausgabeinformationen angezeigt.",
+ "Inference": "Inferenz",
+ "Train": "Trainieren",
+ "Extra": "Extra",
+ "Merge Audios": "Audios zusammenführen",
+ "Processing": "Verarbeitung",
+ "Audio Analyzer": "Audio-Analysator",
+ "Model Information": "Modell-Informationen",
+ "Plugins": "Plugins",
+ "Download": "Herunterladen",
+ "Report a Bug": "Einen Fehler melden",
+ "Settings": "Einstellungen",
+ "Preprocess": "Vorverarbeiten",
+ "Model Name": "Modellname",
+ "Name of the new model.": "Name des neuen Modells.",
+ "Enter model name": "Modellnamen eingeben",
+ "Dataset Path": "Datensatz-Pfad",
+ "Path to the dataset folder.": "Pfad zum Datensatz-Ordner.",
+ "Refresh Datasets": "Aktualisiere den Datensatz",
+ "Dataset Creator": "Ersteller des Datensatzes",
+ "Dataset Name": "Name des Datensatzes",
+ "Name of the new dataset.": "Name des neuen Datensatzes.",
+ "Enter dataset name": "Geben Sie den Namen des Datensatzes ein",
+ "Upload Audio Dataset": "Audio-Datensatz hochladen",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Die Audiodatei wurde erfolgreich zum Datensatz hinzugefügt. Bitte klicken Sie auf die Schaltfläche \"Vorverarbeiten\".",
+ "Enter dataset path": "Datensatz-Pfad eingeben",
+ "Sampling Rate": "Samplingrate",
+ "The sampling rate of the audio files.": "Die Samplingrate der Audiodateien.",
+ "Model Architecture": "RVC-Version",
+ "Version of the model architecture.": "Die RVC-Version des Modells.",
+ "Preprocess Dataset": "Datensatz vorverarbeiten",
+ "Extract": "Extrahieren",
+ "Hop Length": "Sprungweite",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Gibt die Dauer an, die das System benötigt, um zu einer signifikanten Tonhöhenänderung überzugehen. Kleinere Sprunglängen benötigen mehr Zeit für die Inferenz, führen aber tendenziell zu einer höheren Tonhöhengenauigkeit.",
+ "Batch Size": "Losgröße",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Es ist ratsam, es mit dem verfügbaren VRAM Ihrer GPU in Einklang zu bringen. Eine Einstellung von 4 bietet eine verbesserte Genauigkeit, aber eine langsamere Verarbeitung, während 8 schnellere und standardmäßige Ergebnisse liefert.",
+ "Save Every Epoch": "Speichere nach jeder Epoche",
+ "Determine at how many epochs the model will saved at.": "Legen Sie fest, bei wie vielen Epochen das Modell gespeichert wird.",
+ "Total Epoch": "Epoche insgesamt",
+ "Specifies the overall quantity of epochs for the model training process.": "Gibt die Gesamtanzahl der Epochen für den Modelltrainingsprozess an.",
+ "Pretrained": "Vortrainiert",
+ "Save Only Latest": "Nur das Neueste speichern",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Wenn Sie diese Einstellung aktivieren, werden in den G- und D-Dateien nur die neuesten Versionen gespeichert, wodurch Speicherplatz gespart wird.",
+ "Save Every Weights": "Speichern Sie alle Gewichtungen",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Mit dieser Einstellung können Sie die Gewichtungen des Modells am Ende jeder Epoche speichern.",
+ "Custom Pretrained": "Benutzerdefiniert vortrainiert",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Die Verwendung benutzerdefinierter vortrainierter Modelle kann zu besseren Ergebnissen führen, da die Auswahl der am besten geeigneten vortrainierten Modelle, die auf den jeweiligen Anwendungsfall zugeschnitten sind, die Leistung erheblich verbessern kann.",
+ "Upload Pretrained Model": "Vortrainiertes Modell hochladen",
+ "Refresh Custom Pretraineds": "Aktualisiere benutzerdefinierte vortrainierte",
+ "Pretrained Custom Settings": "Vortrainierte benutzerdefinierte Einstellungen",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Die Datei, die Sie abgelegt haben, ist keine gültige vortrainierte Datei. Bitte versuchen Sie es erneut.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Klicken Sie auf die Schaltfläche \"Aktualisieren\", um die vortrainierte Datei im Dropdown-Menü anzuzeigen.",
+ "Pretrained G Path": "Benutzerdefinierter vortrainierter G Pfad",
+ "Pretrained D Path": "Benutzerdefinierter vortrainierter D Pfad",
+ "GPU Settings": "GPU-Einstellungen",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Legt erweiterte GPU-Einstellungen fest, die für Benutzer mit besserer GPU-Architektur empfohlen werden.",
+ "GPU Custom Settings": "Benutzerdefinierte GPU-Einstellungen",
+ "GPU Number": "GPU-Nummer",
+ "0 to ∞ separated by -": "0 bis ∞ getrennt durch -",
+ "GPU Information": "GPU-Informationen",
+ "Pitch Guidance": "Tonhöhen-Führung",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Durch den Einsatz von Tonhöhenführung wird es möglich, die Intonation der Originalstimme, einschließlich ihrer Tonhöhe, zu spiegeln. Diese Funktion ist besonders wertvoll für das Singen und andere Szenarien, in denen die Beibehaltung der ursprünglichen Melodie oder des Tonhöhenmusters unerlässlich ist.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Verwenden Sie vortrainierte Modelle, wenn Sie Ihre eigenen trainieren. Dieser Ansatz verkürzt die Schulungsdauer und verbessert die Gesamtqualität.",
+ "Extract Features": "Extrahieren von Merkmalen",
+ "Start Training": "Training starten",
+ "Generate Index": "Index generieren",
+ "Voice Model": "Sprach-Modell",
+ "Select the voice model to use for the conversion.": "Wählen Sie das Sprachmodell aus, das für die Konvertierung verwendet werden soll.",
+ "Index File": "Index-Datei",
+ "Select the index file to use for the conversion.": "Wählen Sie die Indexdatei aus, die für die Konvertierung verwendet werden soll.",
+ "Refresh": "Aktualisieren",
+ "Unload Voice": "Stimme entladen",
+ "Single": "Einzeln",
+ "Upload Audio": "Audio hochladen",
+ "Select Audio": "Wählen Sie ein Audio",
+ "Select the audio to convert.": "Wählen Sie das zu konvertierende Audio aus.",
+ "Advanced Settings": "Erweiterte Einstellungen",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Ausgaben löschen (Löscht alle Audios in assets/audios)",
+ "Custom Output Path": "Benutzerdefinierter Ausgabepfad",
+ "Output Path": "Ausgabepfad",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Der Pfad, in dem das Ausgabeaudio gespeichert wird, standardmäßig in assets/audios/output.wav",
+ "Split Audio": "Audio aufteilen",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Teilen Sie die Audiodaten für Rückschlüsse in Blöcke auf, um in einigen Fällen bessere Ergebnisse zu erzielen.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Wenden Sie ein sanftes Autotune auf Ihre Inferenzen an, das für Gesangskonvertierungen empfohlen wird.",
+ "Clean Audio": "Audio bereinigen",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Bereinigen Sie Ihre Audioausgabe mithilfe von Rauscherkennungsalgorithmen, die für gesprochene Audios empfohlen werden.",
+ "Clean Strength": "Bereinigungsstärke",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Stellen Sie den Bereinigungsstärke auf den gewünschten Wert ein, je mehr Sie ihn erhöhen, desto mehr wird bereinigt, aber es ist möglich, dass der Ton stärker komprimiert wird.",
+ "Pitch": "Tonhöhe",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Stellen Sie die Tonhöhe des Audios ein, je höher der Wert, desto höher die Tonhöhe.",
+ "Filter Radius": "Filter-Radius",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Wenn die Zahl größer oder gleich drei ist, kann die Verwendung einer Medianfilterung für die gesammelten Tonergebnisse die Atmung verringern.",
+ "Search Feature Ratio": "Such-Merkmal-Verhältnis",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Einfluss, der von der Indexdatei ausgeübt wird; Ein höherer Wert entspricht einem größeren Einfluss. Wenn Sie sich jedoch für niedrigere Werte entscheiden, können Sie Artefakte im Audiomaterial abschwächen.",
+ "Volume Envelope": "Lautstärke-Hüllkurve",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Ersetzen oder überblenden Sie die Lautstärke-Hüllkurve des Ausgangs. Je näher das Verhältnis an 1 liegt, desto mehr wird die Ausgangshüllkurve verwendet.",
+ "Protect Voiceless Consonants": "Schützen Sie stimmlose Konsonanten",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Schützen Sie eindeutige Konsonanten und Atemgeräusche, um elektroakustisches Reißen und andere Artefakte zu vermeiden. Das Ziehen des Parameters auf den Maximalwert von 0,5 bietet einen umfassenden Schutz. Das Verringern dieses Werts kann jedoch den Umfang des Schutzes verringern und gleichzeitig den Indizierungseffekt möglicherweise abschwächen.",
+ "Pitch extraction algorithm": "Algorithmus zur Tonhöhenextraktion",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Tonhöhenextraktionsalgorithmus, der für die Audiokonvertierung verwendet werden soll. Der Standardalgorithmus ist rmvpe, der in den meisten Fällen empfohlen wird.",
+ "Convert": "Umwandeln",
+ "Export Audio": "Audio exportieren",
+ "Batch": "Charge",
+ "Input Folder": "Eingabe-Ordner",
+ "Select the folder containing the audios to convert.": "Wählen Sie den Ordner aus, der die zu konvertierenden Audios enthält.",
+ "Enter input path": "Eingabepfad eingeben",
+ "Output Folder": "Ausgabe-Ordner",
+ "Select the folder where the output audios will be saved.": "Wählen Sie den Ordner aus, in dem die ausgegebenen Audios gespeichert werden sollen.",
+ "Enter output path": "Ausgabepfad eingeben",
+ "Get information about the audio": "Abrufen von Informationen zum Audio",
+ "Information about the audio file": "Informationen zur Audiodatei",
+ "Waiting for information...": "Warten auf Informationen...",
+ "## Voice Blender": "## Voice Blender",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Wählen Sie zwei Stimmmodelle aus, legen Sie den gewünschten Überblendungsprozentsatz fest und mischen Sie sie zu einer völlig neuen Stimme.",
+ "Voice Blender": "Voice Blender",
+ "Drag and drop your model here": "Ziehen Sie Ihr Modell per Drag & Drop hierher",
+ "You can also use a custom path.": "Sie können auch einen benutzerdefinierten Pfad verwenden.",
+ "Blend Ratio": "Mischungsverhältnis",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Wenn Sie die Position mehr auf die eine oder andere Seite anpassen, wird das Modell dem ersten oder zweiten ähnlicher.",
+ "Fusion": "Verschmelzen",
+ "Path to Model": "Pfad zum Modell",
+ "Enter path to model": "Pfad zum Modell eingeben",
+ "Model information to be placed": "Zu platzierende Modellinformationen",
+ "Inroduce the model information": "Einfügen der Modellinformationen",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Die Informationen, die in das Modell eingefügt werden sollen (Sie können das Feld leer lassen oder etwas anderes einfügen).",
+ "View model information": "Anzeigen von Modellinformationen",
+ "Introduce the model pth path": "Einfügen des pth Pfad des Modells",
+ "View": "Ansehen",
+ "Model extraction": "Modell-Extraktion",
+ "Model conversion": "Modell-Konvertierung",
+ "Pth file": "Pth-Datei",
+ "Output of the pth file": "Ausgabe der pth-Datei",
+ "# How to Report an Issue on GitHub": "# So melden Sie ein Problem auf GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klicken Sie unten auf die Schaltfläche \"Bildschirm aufzeichnen\", um mit der Aufzeichnung des aufgetretenen Problems zu beginnen.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Wenn Sie mit der Aufnahme des Problems fertig sind, klicken Sie auf die Schaltfläche \"Aufnahme beenden\" (dieselbe Schaltfläche, aber die Beschriftung ändert sich, je nachdem, ob Sie aktiv aufnehmen oder nicht).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Gehen Sie zu [GitHub Issues](https://github.com/IAHispano/Applio/issues) und klicken Sie auf die Schaltfläche \"New Issue\".",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Füllen Sie die bereitgestellte Problemvorlage aus, stellen Sie sicher, dass Sie die erforderlichen Details angeben, und verwenden Sie den Abschnitt \"Assets\", um die aufgezeichnete Datei aus dem vorherigen Schritt hochzuladen.",
+ "Record Screen": "Bildschirm aufzeichnen",
+ "Record": "Aufzeichnen",
+ "Stop Recording": "Aufzeichnung beenden",
+ "Introduce the model .pth path": "Einfügen des .pth Pfad des Modells",
+ "See Model Information": "Siehe Modellinformationen",
+ "## Download Model": "## Modell herunterladen",
+ "Model Link": "Modell-Link",
+ "Introduce the model link": "Einfügen des Modell-Links",
+ "Download Model": "Modell herunterladen",
+ "## Drop files": "## Dateien ablegen",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Ziehen Sie Ihre .pth und .index Datei in diesen Bereich. Ziehen Sie das eine und dann das andere.",
+ "TTS Voices": "TTS-Stimmen",
+ "Select the TTS voice to use for the conversion.": "Wählen Sie die TTS-Stimme aus, die für die Konvertierung verwendet werden soll.",
+ "Text to Synthesize": "Zu synthetisierender Text",
+ "Enter the text to synthesize.": "Geben Sie den zu synthetisierenden Text ein.",
+ "Or you can upload a .txt file": "Oder Sie können eine .txt Datei hochladen",
+ "Enter text to synthesize": "Geben Sie den zu synthetisierenden Text ein",
+ "Output Path for TTS Audio": "Ausgabepfad für das TTS-Audio",
+ "Output Path for RVC Audio": "Ausgabepfad für das RVC-Audio",
+ "Enable Applio integration with Discord presence": "Aktivieren Sie die Applio-Integration mit Discord-Presence",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Es aktiviert die Möglichkeit, die aktuelle Applio-Aktivität in Discord anzuzeigen.",
+ "Enable Applio integration with applio.org/models using flask": "Aktivieren der Applio-Integration mit applio.org/models mithilfe von flask",
+ "It will activate the possibility of downloading models with a click from the website.": "Es aktiviert die Möglichkeit, Modelle mit einem Klick von der Website herunterzuladen.",
+ "Theme": "Design",
+ "Select the theme you want to use. (Requires restarting Applio)": "Wählen Sie das Design aus, das Sie verwenden möchten. (Erfordert einen Neustart von Applio)",
+ "Language": "Sprache",
+ "Select the language you want to use. (Requires restarting Applio)": "Wählen Sie die Sprache aus, die Sie verwenden möchten. (Erfordert einen Neustart von Applio)",
+ "Plugin Installer": "Plugin-Installer",
+ "Drag your plugin.zip to install it": "Ziehen Sie Ihre plugin.zip, um sie zu installieren",
+ "Version Checker": "Versions-Checker",
+ "Check which version of Applio is the latest to see if you need to update.": "Überprüfen Sie, welche Version von Applio die neueste ist, um zu sehen, ob Sie ein Update benötigen.",
+ "Check for updates": "Nach Updates suchen"
+}
diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json
new file mode 100644
index 0000000000000000000000000000000000000000..50f16502c26f80f8f8eb9fb7b0a977deeafedb94
--- /dev/null
+++ b/assets/i18n/languages/en_US.json
@@ -0,0 +1,308 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "VITS-based Voice Conversion focused on simplicity, quality and performance.",
+ "This section contains some extra utilities that often may be in experimental phases.": "This section contains some extra utilities that often may be in experimental phases.",
+ "Output Information": "Output Information",
+ "The output information will be displayed here.": "The output information will be displayed here.",
+ "Inference": "Inference",
+ "Train": "Train",
+ "Extra": "Extra",
+ "Merge Audios": "Merge Audios",
+ "Processing": "Processing",
+ "Audio Analyzer": "Audio Analyzer",
+ "Model Information": "Model Information",
+ "Plugins": "Plugins",
+ "Download": "Download",
+ "Report a Bug": "Report a Bug",
+ "Settings": "Settings",
+ "Preprocess": "Preprocess",
+ "Audio cutting": "Audio cutting",
+ "It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.",
+ "Process effects": "Process effects",
+ "Model Name": "Model Name",
+ "Name of the new model.": "Name of the new model.",
+ "Enter model name": "Enter model name",
+ "Dataset Path": "Dataset Path",
+ "Path to the dataset folder.": "Path to the dataset folder.",
+ "Refresh Datasets": "Refresh Datasets",
+ "Dataset Creator": "Dataset Creator",
+ "Dataset Name": "Dataset Name",
+ "Name of the new dataset.": "Name of the new dataset.",
+ "Enter dataset name": "Enter dataset name",
+ "Upload Audio Dataset": "Upload Audio Dataset",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "The audio file has been successfully added to the dataset. Please click the preprocess button.",
+ "Enter dataset path": "Enter dataset path",
+ "Sampling Rate": "Sampling Rate",
+ "The sampling rate of the audio files.": "The sampling rate of the audio files.",
+ "Model Architecture": "Model Architecture",
+ "Version of the model architecture.": "Version of the model architecture.",
+ "Preprocess Dataset": "Preprocess Dataset",
+ "Embedder Model": "Embedder Model",
+ "Model used for learning speaker embedding.": "Model used for learning speaker embedding.",
+ "Extract": "Extract",
+ "Hop Length": "Hop Length",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.",
+ "Batch Size": "Batch Size",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.",
+ "Save Every Epoch": "Save Every Epoch",
+ "Determine at how many epochs the model will saved at.": "Determine at how many epochs the model will saved at.",
+ "Total Epoch": "Total Epoch",
+ "Specifies the overall quantity of epochs for the model training process.": "Specifies the overall quantity of epochs for the model training process.",
+ "Pretrained": "Pretrained",
+ "Save Only Latest": "Save Only Latest",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.",
+ "Save Every Weights": "Save Every Weights",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "This setting enables you to save the weights of the model at the conclusion of each epoch.",
+ "Custom Pretrained": "Custom Pretrained",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.",
+ "Upload Pretrained Model": "Upload Pretrained Model",
+ "Refresh Custom Pretraineds": "Refresh Custom Pretraineds",
+ "Pretrained Custom Settings": "Pretrained Custom Settings",
+ "The file you dropped is not a valid pretrained file. Please try again.": "The file you dropped is not a valid pretrained file. Please try again.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Click the refresh button to see the pretrained file in the dropdown menu.",
+ "Pretrained G Path": "Custom Pretrained G",
+ "Pretrained D Path": "Custom Pretrained D",
+ "GPU Settings": "GPU Settings",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Sets advanced GPU settings, recommended for users with better GPU architecture.",
+ "GPU Custom Settings": "GPU Custom Settings",
+ "GPU Number": "GPU Number",
+ "0 to ∞ separated by -": "0 to ∞ separated by -",
+ "The GPU information will be displayed here.": "The GPU information will be displayed here.",
+ "Specify the number of GPUs you wish to utilize for preprocess by entering them separated by hyphens (-). At the moment, using multi-gpu will not have a significant effect.": "Specify the number of GPUs you wish to utilize for preprocess by entering them separated by hyphens (-). At the moment, using multi-gpu will not have a significant effect.",
+ "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-).": "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-).",
+ "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases.": "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases.",
+ "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases.": "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases.",
+ "GPU Information": "GPU Information",
+ "Pitch Guidance": "Pitch Guidance",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.",
+ "Extract Features": "Extract Features",
+ "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.",
+ "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.",
+ "Overtraining Detector": "Overtraining Detector",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.",
+ "Overtraining Detector Settings": "Overtraining Detector Settings",
+ "Overtraining Threshold": "Overtraining Threshold",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Set the maximum number of epochs you want your model to stop training if no improvement is detected.",
+ "Sync Graph": "Sync Graph",
+ "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.",
+ "Start Training": "Start Training",
+ "Stop Training": "Stop Training",
+ "Generate Index": "Generate Index",
+ "Export Model": "Export Model",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.",
+ "Exported Pth file": "Exported Pth file",
+ "Exported Index file": "Exported Index file",
+ "Select the pth file to be exported": "Select the pth file to be exported",
+ "Select the index file to be exported": "Select the index file to be exported",
+ "Upload": "Upload",
+ "Voice Model": "Voice Model",
+ "Select the voice model to use for the conversion.": "Select the voice model to use for the conversion.",
+ "Index File": "Index File",
+ "Select the index file to use for the conversion.": "Select the index file to use for the conversion.",
+ "Refresh": "Refresh",
+ "Unload Voice": "Unload Voice",
+ "Single": "Single",
+ "Upload Audio": "Upload Audio",
+ "Select Audio": "Select Audio",
+ "Select the audio to convert.": "Select the audio to convert.",
+ "Advanced Settings": "Advanced Settings",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Clear Outputs (Deletes all audios in assets/audios)",
+ "Custom Output Path": "Custom Output Path",
+ "Output Path": "Output Path",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "The path where the output audio will be saved, by default in assets/audios/output.wav",
+ "Split Audio": "Split Audio",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Split the audio into chunks for inference to obtain better results in some cases.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Apply a soft autotune to your inferences, recommended for singing conversions.",
+ "Clean Audio": "Clean Audio",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Clean your audio output using noise detection algorithms, recommended for speaking audios.",
+ "Clean Strength": "Clean Strength",
+ "Upscale Audio": "Upscale Audio",
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.",
+ "Formant Shifting": "Formant Shifting",
+ "Enable formant shifting. Used for male to female and vice-versa convertions.": "Enable formant shifting. Used for male to female and vice-versa convertions.",
+ "Browse presets for formanting": "Browse presets for formanting",
+ "Presets are located in /assets/formant_shift folder": "Presets are located in /assets/formant_shift folder",
+ "Default value is 1.0": "Default value is 1.0",
+ "Quefrency for formant shifting": "Quefrency for formant shifting",
+ "Timbre for formant shifting": "Timbre for formant shifting",
+ "Pitch": "Pitch",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Set the pitch of the audio, the higher the value, the higher the pitch.",
+ "Filter Radius": "Filter Radius",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.",
+ "Search Feature Ratio": "Search Feature Ratio",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.",
+ "Volume Envelope": "Volume Envelope",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.",
+ "Protect Voiceless Consonants": "Protect Voiceless Consonants",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.",
+ "Pitch extraction algorithm": "Pitch extraction algorithm",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.",
+ "Convert": "Convert",
+ "Export Audio": "Export Audio",
+ "Batch": "Batch",
+ "Input Folder": "Input Folder",
+ "Select the folder containing the audios to convert.": "Select the folder containing the audios to convert.",
+ "Enter input path": "Enter input path",
+ "Output Folder": "Output Folder",
+ "Select the folder where the output audios will be saved.": "Select the folder where the output audios will be saved.",
+ "Enter output path": "Enter output path",
+ "Get information about the audio": "Get information about the audio",
+ "## Voice Blender": "## Voice Blender",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.",
+ "Voice Blender": "Voice Blender",
+ "Drag and drop your model here": "Drag and drop your model here",
+ "You can also use a custom path.": "You can also use a custom path.",
+ "Blend Ratio": "Blend Ratio",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Adjusting the position more towards one side or the other will make the model more similar to the first or second.",
+ "Fusion": "Fusion",
+ "Path to Model": "Path to Model",
+ "Enter path to model": "Enter path to model",
+ "Model information to be placed": "Model information to be placed",
+ "Inroduce the model information": "Inroduce the model information",
+ "The information to be placed in the model (You can leave it blank or put anything).": "The information to be placed in the model (You can leave it blank or put anything).",
+ "View model information": "View model information",
+ "Introduce the model pth path": "Introduce the model pth path",
+ "View": "View",
+ "Model extraction": "Model extraction",
+ "Model conversion": "Model conversion",
+ "Pth file": "Pth file",
+ "Output of the pth file": "Output of the pth file",
+ "Extract F0 Curve": "Extract F0 Curve",
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.",
+ "# How to Report an Issue on GitHub": "# How to Report an Issue on GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.",
+ "Record Screen": "Record Screen",
+ "Record": "Record",
+ "Stop Recording": "Stop Recording",
+ "Introduce the model .pth path": "Introduce the model .pth path",
+ "See Model Information": "See Model Information",
+ "## Download Model": "## Download Model",
+ "Model Link": "Model Link",
+ "Introduce the model link": "Introduce the model link",
+ "Download Model": "Download Model",
+ "## Drop files": "## Drop files",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Drag your .pth file and .index file into this space. Drag one and then the other.",
+ "## Search Model": "## Search Model",
+ "Search": "Search",
+ "Introduce the model name to search.": "Introduce the model name to search.",
+ "We couldn't find models by that name.": "We couldn't find models by that name.",
+ "## Download Pretrained Models": "## Download Pretrained Models",
+ "Select the pretrained model you want to download.": "Select the pretrained model you want to download.",
+ "And select the sampling rate": "And select the sampling rate.",
+ "TTS Voices": "TTS Voices",
+ "TTS Speed": "TTS Speed",
+ "Increase or decrease TTS speed.": "Increase or decrease TTS speed.",
+ "Select the TTS voice to use for the conversion.": "Select the TTS voice to use for the conversion.",
+ "Text to Synthesize": "Text to Synthesize",
+ "Enter the text to synthesize.": "Enter the text to synthesize.",
+ "Or you can upload a .txt file": "Or you can upload a .txt file",
+ "Enter text to synthesize": "Enter text to synthesize",
+ "Output Path for TTS Audio": "Output Path for TTS Audio",
+ "Output Path for RVC Audio": "Output Path for RVC Audio",
+ "Enable Applio integration with Discord presence": "Enable Applio integration with Discord presence",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "It will activate the possibility of displaying the current Applio activity in Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Enable Applio integration with applio.org/models using flask",
+ "It will activate the possibility of downloading models with a click from the website.": "It will activate the possibility of downloading models with a click from the website.",
+ "Enable fake GPU": "Enable fake GPU",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)",
+ "Theme": "Theme",
+ "Select the theme you want to use. (Requires restarting Applio)": "Select the theme you want to use. (Requires restarting Applio)",
+ "Language": "Language",
+ "Select the language you want to use. (Requires restarting Applio)": "Select the language you want to use. (Requires restarting Applio)",
+ "Precision": "Precision",
+ "Select the precision you want to use for training and inference.": "Select the precision you want to use for training and inference.",
+ "Update precision": "Update precision",
+ "Plugin Installer": "Plugin Installer",
+ "Drag your plugin.zip to install it": "Drag your plugin.zip to install it",
+ "Version Checker": "Version Checker",
+ "Check which version of Applio is the latest to see if you need to update.": "Check which version of Applio is the latest to see if you need to update.",
+ "Check for updates": "Check for updates",
+ "Post-Process": "Post-Process",
+ "Post-process the audio to apply effects to the output.": "Post-process the audio to apply effects to the output.",
+ "Reverb": "Reverb",
+ "Apply reverb to the audio.": "Apply reverb to the audio.",
+ "Reverb Room Size": "Reverb Room Size",
+ "Set the room size of the reverb.": "Set the room size of the reverb.",
+ "Reverb Damping": "Reverb Damping",
+ "Set the damping of the reverb.": "Set the damping of the reverb.",
+ "Reverb Wet Gain": "Reverb Wet Gain",
+ "Set the wet gain of the reverb.": "Set the wet gain of the reverb.",
+ "Reverb Dry Gain": "Reverb Dry Gain",
+ "Set the dry gain of the reverb.": "Set the dry gain of the reverb.",
+ "Reverb Width": "Reverb Width",
+ "Set the width of the reverb.": "Set the width of the reverb.",
+ "Reverb Freeze Mode": "Reverb Freeze Mode",
+ "Set the freeze mode of the reverb.": "Set the freeze mode of the reverb.",
+ "Pitch Shift": "Pitch Shift",
+ "Apply pitch shift to the audio.": "Apply pitch shift to the audio.",
+ "Pitch Shift Semitones": "Pitch Shift Semitones",
+ "Set the pitch shift semitones.": "Set the pitch shift semitones.",
+ "Limiter": "Limiter",
+ "Apply limiter to the audio.": "Apply limiter to the audio.",
+ "Limiter Threshold dB": "Limiter Threshold dB",
+ "Set the limiter threshold dB.": "Set the limiter threshold dB.",
+ "Limiter Release Time": "Limiter Release Time",
+ "Set the limiter release time.": "Set the limiter release time.",
+ "Gain": "Gain",
+ "Apply gain to the audio.": "Apply gain to the audio.",
+ "Gain dB": "Gain dB",
+ "Set the gain dB.": "Set the gain dB.",
+ "Distortion": "Distortion",
+ "Apply distortion to the audio.": "Apply distortion to the audio.",
+ "Distortion Gain": "Distortion Gain",
+ "Set the distortion gain.": "Set the distortion gain.",
+ "Chorus": "Chorus",
+ "Apply chorus to the audio.": "Apply chorus to the audio.",
+ "Chorus Rate Hz": "Chorus Rate Hz",
+ "Set the chorus rate Hz.": "Set the chorus rate Hz.",
+ "Chorus Depth": "Chorus Depth",
+ "Set the chorus depth.": "Set the chorus depth.",
+ "Chorus Center Delay ms": "Chorus Center Delay ms",
+ "Set the chorus center delay ms.": "Set the chorus center delay ms.",
+ "Chorus Feedback": "Chorus Feedback",
+ "Set the chorus feedback.": "Set the chorus feedback.",
+ "Chorus Mix": "Chorus Mix",
+ "Set the chorus mix.": "Set the chorus mix.",
+ "Bitcrush": "Bitcrush",
+ "Apply bitcrush to the audio.": "Apply bitcrush to the audio.",
+ "Bitcrush Bit Depth": "Bitcrush Bit Depth",
+ "Set the bitcrush bit depth.": "Set the bitcrush bit depth.",
+ "Clipping": "Clipping",
+ "Apply clipping to the audio.": "Apply clipping to the audio.",
+ "Clipping Threshold": "Clipping Threshold",
+ "Set the clipping threshold.": "Set the clipping threshold.",
+ "Compressor": "Compressor",
+ "Apply compressor to the audio.": "Apply compressor to the audio.",
+ "Compressor Threshold dB": "Compressor Threshold dB",
+ "Set the compressor threshold dB.": "Set the compressor threshold dB.",
+ "Compressor Ratio": "Compressor Ratio",
+ "Set the compressor ratio.": "Set the compressor ratio.",
+ "Compressor Attack ms": "Compressor Attack ms",
+ "Set the compressor attack ms.": "Set the compressor attack ms.",
+ "Compressor Release ms": "Compressor Release ms",
+ "Set the compressor release ms.": "Set the compressor release ms.",
+ "Delay": "Delay",
+ "Apply delay to the audio.": "Apply delay to the audio.",
+ "Delay Seconds": "Delay Seconds",
+ "Set the delay seconds.": "Set the delay seconds.",
+ "Delay Feedback": "Delay Feedback",
+ "Set the delay feedback.": "Set the delay feedback.",
+ "Delay Mix": "Delay Mix",
+ "Set the delay mix.": "Set the delay mix.",
+ "Custom Embedder": "Custom Embedder",
+ "Select Custom Embedder": "Select Custom Embedder",
+ "Refresh embedders": "Refresh embedders",
+ "Folder Name": "Folder Name",
+ "Upload .bin": "Upload .bin",
+ "Upload .json": "Upload .json",
+ "Move files to custom embedder folder": "Move files to custom embedder folder",
+ "model information": "model information",
+ "Model Creator": "Model Creator",
+ "Name of the model creator. (Default: Unknown)": "Name of the model creator. (Default: Unknown)"
+}
diff --git a/assets/i18n/languages/es_ES.json b/assets/i18n/languages/es_ES.json
new file mode 100644
index 0000000000000000000000000000000000000000..a980c48cf936d30d351453f9d9476031f3edb932
--- /dev/null
+++ b/assets/i18n/languages/es_ES.json
@@ -0,0 +1,206 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "La herramienta de clonación de voz definitiva, meticulosamente optimizada para una potencia, modularidad y experiencia de uso inigualables.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Esta sección contiene algunas utilidades adicionales que a menudo pueden estar en fases experimentales.",
+ "Output Information": "Información de salida",
+ "The output information will be displayed here.": "La información de salida se mostrará aquí.",
+ "Inference": "Inferencia",
+ "Train": "Entrenar",
+ "Extra": "Extra",
+ "Merge Audios": "Fusionar audios",
+ "Processing": "Procesamiento",
+ "Audio Analyzer": "Analizador de audio",
+ "Model Information": "Información del modelo",
+ "Plugins": "Complementos",
+ "Download": "Descargar",
+ "Report a Bug": "Informar de un error",
+ "Settings": "Configuración",
+ "Preprocess": "Preprocesar",
+ "Model Name": "Nombre del modelo",
+ "Name of the new model.": "Nombre del nuevo modelo.",
+ "Enter model name": "Introduzca el nombre del modelo",
+ "Dataset Path": "Ruta del dataset",
+ "Path to the dataset folder.": "Ruta de acceso al dataset.",
+ "Refresh Datasets": "Actualizar datasets",
+ "Dataset Creator": "Creador de datasets",
+ "Dataset Name": "Nombre del dataset",
+ "Name of the new dataset.": "Nombre del nuevo dataset.",
+ "Enter dataset name": "Introduzca el nombre del dataset",
+ "Upload Audio Dataset": "Cargar audio del dataset",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "El archivo de audio se ha agregado correctamente dataset. Haga clic en el botón de preprocesamiento.",
+ "Enter dataset path": "Introduzca la ruta del dataset",
+ "Sampling Rate": "Frecuencia de muestreo",
+ "The sampling rate of the audio files.": "La frecuencia de muestreo de los archivos de audio.",
+ "Model Architecture": "Versión RVC",
+ "Version of the model architecture.": "La versión RVC del modelo.",
+ "Preprocess Dataset": "Preprocesar dataset",
+ "Embedder Model": "Modelo de incrustación",
+ "Model used for learning speaker embedding.": "Modelo utilizado para aprender la incrustación del hablante.",
+ "Hop Length": "Longitud del salto",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Denota el tiempo que tarda el sistema en realizar la transición a un cambio de tono significativo. Las longitudes de salto más pequeñas requieren más tiempo para la inferencia, pero tienden a producir una mayor precisión de tono.",
+ "Batch Size": "Tamaño del lote",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Es recomendable alinearlo con la VRAM disponible de tu GPU. Un ajuste de 4 ofrece una precisión mejorada pero un procesamiento más lento, mientras que 8 proporciona resultados más rápidos y estándar.",
+ "Save Every Epoch": "Frecuencia de guardado",
+ "Determine at how many epochs the model will saved at.": "Determine en cuántas épocas se guardará el modelo.",
+ "Total Epoch": "Épocas",
+ "Specifies the overall quantity of epochs for the model training process.": "Especifica la cantidad total de épocas para el proceso de entrenamiento del modelo.",
+ "Pretrained": "Preentrenado",
+ "Save Only Latest": "Guarde solo lo último",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Al habilitar esta configuración, los archivos G y D guardarán solo sus versiones más recientes, lo que ahorrará espacio de almacenamiento de manera efectiva.",
+ "Save Every Weights": "Ahorre todos los pesos",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Este ajuste le permite guardar los pesos del modelo al final de cada época.",
+ "Custom Pretrained": "Preentrenado personalizado",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "El uso de modelos preentrenados personalizados puede conducir a resultados superiores, ya que la selección de los modelos preentrenados más adecuados adaptados al caso de uso específico puede mejorar significativamente el rendimiento.",
+ "Upload Pretrained Model": "Cargar modelo previamente entrenado",
+ "Refresh Custom Pretraineds": "Actualizar preentrenados personalizados",
+ "Pretrained Custom Settings": "Configuración personalizada previamente entrenada",
+ "The file you dropped is not a valid pretrained file. Please try again.": "El archivo que has subido no es un archivo preentrenado válido. Por favor, inténtelo de nuevo.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Haga clic en el botón de actualización para ver el archivo previamente entrenado en el menú desplegable.",
+ "Pretrained G Path": "G preentrenado personalizado",
+ "Pretrained D Path": "D preentrenado personalizado",
+ "GPU Settings": "Configuración de GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Establece la configuración avanzada del GPU, recomendada para usuarios con una mejor arquitectura de GPU.",
+ "GPU Custom Settings": "Configuración personalizada de la GPU",
+ "GPU Number": "Número de GPU",
+ "0 to ∞ separated by -": "0 a ∞ separados por -",
+ "GPU Information": "Información de GPU",
+ "Pitch Guidance": "Guía de tono",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Al emplear la guía de tono, es factible reflejar la entonación de la voz original, incluido su tono. Esta característica es particularmente valiosa para cantar y otros escenarios en los que es esencial preservar la melodía original o el patrón de tono.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilice modelos preentrenados al entrenar los suyos propios. Este enfoque reduce la duración del entrenamiento y mejora la calidad general.",
+ "Extract": "Extracción",
+ "Extract Features": "Extraer características",
+ "Overtraining Detector": "Detector de Sobreentrenamiento",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Detectar el sobreentrenamiento para evitar que el modelo aprenda demasiado bien los datos de entrenamiento y pierda la capacidad de generalizar a nuevos datos.",
+ "Overtraining Detector Settings": "Configuraciones del Detector de Sobreentrenamiento",
+ "Overtraining Threshold": "Umbral de Sobreentrenamiento",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Establezca el número máximo de épocas para que su modelo deje de entrenar si no se detecta ninguna mejora.",
+ "Sync Graph": "Sincronizar gráfico",
+ "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Sincronice el gráfico del tensorbaord. Solo active esta configuración si está entrenando un nuevo modelo.",
+ "Start Training": "Empezar a entrenar",
+ "Stop Training & Restart Applio": "Detener el entrenamiento y reiniciar Applio",
+ "Generate Index": "Generar índice",
+ "Export Model": "Exportar modelo",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "El botón 'Subir' es solo para Google Colab: Sube los archivos exportados a la carpeta ApplioExported en tu Google Drive.",
+ "Exported Pth file": "Archivo Pth exportado",
+ "Exported Index file": "Archivo de índice exportado",
+ "Select the pth file to be exported": "Seleccione el archivo pth que se va a exportar",
+ "Select the index file to be exported": "Seleccione el archivo de índice que se va a exportar",
+ "Upload": "Subir",
+ "Voice Model": "Modelo de voz",
+ "Select the voice model to use for the conversion.": "Seleccione el modelo de voz que desea utilizar para la conversión.",
+ "Index File": "Archivo de índice",
+ "Select the index file to use for the conversion.": "Seleccione el archivo de índice que desea utilizar para la conversión.",
+ "Refresh": "Actualizar",
+ "Unload Voice": "Descargar voz",
+ "Single": "Solo",
+ "Upload Audio": "Subir audio",
+ "Select Audio": "Seleccionar audio",
+ "Select the audio to convert.": "Seleccione el audio que desea convertir.",
+ "Advanced Settings": "Ajustes avanzados",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Borrar salidas (elimina todos los audios de los assets/audios)",
+ "Custom Output Path": "Ruta de salida personalizada",
+ "Output Path": "Ruta de salida",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "La ruta donde se guardará el audio de salida, de forma predeterminada en assets/audios/output.wav",
+ "Split Audio": "Audio dividido",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Divida el audio en fragmentos para inferir y obtener mejores resultados en algunos casos.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Aplique un autotune suave a sus inferencias, recomendado para conversiones de canto.",
+ "Clean Audio": "Audio limpio",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Limpie su salida de audio utilizando algoritmos de detección de ruido, recomendados para audios hablados.",
+ "Clean Strength": "Fuerza de limpieza",
+ "Upscale Audio": "Audio de alta calidad",
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Mejore la calidad del audio a una calidad superior, recomendado para audios de baja calidad. (Podría tardar más en procesar el audio)",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Establezca el nivel de limpieza en el audio que desee, cuanto más lo aumente, más se limpiará, pero es posible que el audio esté más comprimido.",
+ "Pitch": "Tono",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Establezca el tono del audio, cuanto mayor sea el valor, mayor será el tono.",
+ "Filter Radius": "Radio del filtro",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Si el número es mayor o igual a tres, el empleo de filtrado de mediana en los resultados de tono recopilados tiene el potencial de disminuir la respiración.",
+ "Search Feature Ratio": "Relación de características de búsqueda",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influencia ejercida por el fichero índice; Un valor más alto corresponde a una mayor influencia. Sin embargo, optar por valores más bajos puede ayudar a mitigar los artefactos presentes en el audio.",
+ "Volume Envelope": "Envoltura de volumen",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Sustituya o mezcle con la envoltura de volumen de la salida. Cuanto más se acerque la relación a 1, más se empleará la envoltura de salida.",
+ "Protect Voiceless Consonants": "Proteger las consonantes sordas",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Proteja las distintas consonantes y sonidos respiratorios para evitar desgarros electroacústicos y otros artefactos. Llevar el parámetro a su valor máximo de 0,5 ofrece una protección completa. Sin embargo, la reducción de este valor podría disminuir el alcance de la protección y, al mismo tiempo, mitigar el efecto de indexación.",
+ "Pitch extraction algorithm": "Algoritmo de extracción de tono",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritmo de extracción de tono que se utilizará para la conversión de audio. El algoritmo predeterminado es rmvpe, que se recomienda para la mayoría de los casos.",
+ "Convert": "Convertir",
+ "Export Audio": "Exportar audio",
+ "Batch": "Lote",
+ "Input Folder": "Carpeta de entrada",
+ "Select the folder containing the audios to convert.": "Seleccione la carpeta que contiene los audios que desea convertir.",
+ "Enter input path": "Introduzca la ruta de entrada",
+ "Output Folder": "Carpeta de salida",
+ "Select the folder where the output audios will be saved.": "Seleccione la carpeta donde se guardarán los audios de salida.",
+ "Enter output path": "Introduzca la ruta de salida",
+ "Get information about the audio": "Obtener información sobre el audio",
+ "Information about the audio file": "Información sobre el archivo de audio",
+ "Waiting for information...": "A la espera de información...",
+ "## Voice Blender": "## Mezclador de voz",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Seleccione dos modelos de voz, establezca el porcentaje de mezcla deseado y combínelos en una voz completamente nueva.",
+ "Voice Blender": "Mezclador de voz",
+ "Drag and drop your model here": "Arrastra y suelta tu modelo aquí",
+ "You can also use a custom path.": "También puede utilizar una ruta de acceso personalizada.",
+ "Blend Ratio": "Relación de mezcla",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Ajustar la posición más hacia un lado u otro hará que el modelo se parezca más al primero o al segundo.",
+ "Fusion": "Fusión",
+ "Path to Model": "Ruta de acceso al modelo",
+ "Enter path to model": "Introduzca la ruta al modelo",
+ "Model information to be placed": "Información del modelo que se va a colocar",
+ "Inroduce the model information": "Introduzca la información del modelo",
+ "The information to be placed in the model (You can leave it blank or put anything).": "La información que se va a colocar en el modelo (Puedes dejarlo en blanco o poner cualquier cosa).",
+ "View model information": "Ver información del modelo",
+ "Introduce the model pth path": "Introducción de la ruta pth del modelo",
+ "View": "Vista",
+ "Model extraction": "Extracción de modelos",
+ "Model conversion": "Conversión de modelos",
+ "Pth file": "Archivo Pth",
+ "Output of the pth file": "Salida del fichero pth",
+ "# How to Report an Issue on GitHub": "# Cómo reportar un problema en GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Haga clic en el botón 'Grabar pantalla' a continuación para comenzar a grabar el problema que está experimentando.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Una vez que haya terminado de grabar el problema, haga clic en el botón 'Detener grabación' (el mismo botón, pero la etiqueta cambia dependiendo de si está grabando activamente o no).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vaya a [Problemas de GitHub](https://github.com/IAHispano/Applio/issues) y haga clic en el botón 'Nuevo problema'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete la plantilla de problema proporcionada, asegurándose de incluir los detalles según sea necesario, y utilice la sección de activos para cargar el archivo grabado del paso anterior.",
+ "Record Screen": "Grabar pantalla",
+ "Record": "Grabar",
+ "Stop Recording": "Detener la grabación",
+ "Introduce the model .pth path": "Introducción de la ruta de acceso .pth del modelo",
+ "See Model Information": "Ver información del modelo",
+ "## Download Model": "## Descargar modelo",
+ "Model Link": "Enlace de modelo",
+ "Introduce the model link": "Introducir el enlace del modelo",
+ "Download Model": "Descargar modelo",
+ "## Drop files": "## Soltar archivos",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arrastre el archivo .pth y el archivo .index a este espacio. Arrastre uno y luego el otro.",
+ "## Search Model": "## Buscar modelo",
+ "Search": "Buscar",
+ "Introduce the model name to search.": "Introduzca el nombre del modelo a buscar.",
+ "We couldn't find models by that name.": "No pudimos encontrar modelos con ese nombre.",
+ "## Download Pretrained Models": "## Descargar modelos preentrenados",
+ "Select the pretrained model you want to download.": "Seleccione el modelo preentrenado que desea descargar.",
+ "And select the sampling rate.": "Y seleccione la frecuencia de muestreo.",
+ "TTS Voices": "Voces TTS",
+ "TTS Speed": "Velocidad TTS",
+ "Increase or decrease TTS speed": "Aumentar o disminuir la velocidad de TTS",
+ "Select the TTS voice to use for the conversion.": "Seleccione la voz TTS que se utilizará para la conversión.",
+ "Text to Synthesize": "Texto para sintetizar",
+ "Enter the text to synthesize.": "Introduzca el texto que desea sintetizar.",
+ "Or you can upload a .txt file": "O bien, puede cargar un archivo .txt",
+ "Enter text to synthesize": "Introduzca el texto que desea sintetizar",
+ "Output Path for TTS Audio": "Ruta de salida para audio TTS",
+ "Output Path for RVC Audio": "Ruta de salida para audio RVC",
+ "Enable Applio integration with Discord presence": "Habilitar la integración de Applio con la presencia de Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Activará la posibilidad de mostrar la actividad actual de Applio en Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Habilite la integración de Applio con applio.org/models usando flask",
+ "It will activate the possibility of downloading models with a click from the website.": "Activará la posibilidad de descargar modelos con un clic desde el sitio web.",
+ "Enable fake GPU": "Habilitar GPU falsa",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "Actualmente no se admite el entrenamiento debido a la ausencia de una GPU. Para activar la pestaña de entrenamiento, vaya a la pestaña de configuración y habilite la opción 'GPU falsa'.",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Activa la pestaña de entrenamiento. Sin embargo, tenga en cuenta que este dispositivo carece de capacidades de GPU, por lo que no se admite el entrenamiento. Esta opción es solo para fines de prueba. (Esta opción reiniciará Applio)",
+ "Theme": "Tema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Seleccione el tema que desea utilizar. (Requiere reiniciar Applio)",
+ "Language": "Idioma",
+ "Select the language you want to use. (Requires restarting Applio)": "Seleccione el idioma que desea utilizar. (Requiere reiniciar Applio)",
+ "Plugin Installer": "Instalador de complementos",
+ "Drag your plugin.zip to install it": "Arrastra tu plugin.zip para instalarlo",
+ "Version Checker": "Comprobador de versiones",
+ "Check which version of Applio is the latest to see if you need to update.": "Compruebe qué versión de Applio es la más reciente para ver si necesita actualizar.",
+ "Check for updates": "Buscar actualizaciones"
+}
diff --git a/assets/i18n/languages/fa_FA.json b/assets/i18n/languages/fa_FA.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a37d5eb1df663cf1bd1f9b43df6eddf36241e99
--- /dev/null
+++ b/assets/i18n/languages/fa_FA.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "ابزار شبیه سازی صدا نهایی، با دقت برای قدرت بی نظیر، مدولار بودن و تجربه کاربر پسند بهینه شده است.",
+ "This section contains some extra utilities that often may be in experimental phases.": "این بخش شامل برخی از ابزارهای اضافی است که اغلب ممکن است در مراحل ازمایشی باشد.",
+ "Output Information": "اطلاعات خروجی",
+ "The output information will be displayed here.": "اطلاعات خروجی در اینجا نمایش داده خواهد شد.",
+ "Inference": "استنباط",
+ "Train": "قطار",
+ "Extra": "اضافی",
+ "Merge Audios": "ادغام Audios",
+ "Processing": "پردازش",
+ "Audio Analyzer": "انالایزر صوتی",
+ "Model Information": "اطلاعات مدل",
+ "Plugins": "پلاگین",
+ "Download": "دانلود",
+ "Report a Bug": "گزارش یک باگ",
+ "Settings": "تنظیمات",
+ "Preprocess": "پیش پردازش",
+ "Model Name": "نام مدل",
+ "Name of the new model.": "نام مدل جدید",
+ "Enter model name": "نام مدل را وارد کنید",
+ "Dataset Path": "مسیر مجموعه داده",
+ "Path to the dataset folder.": "مسیر به پوشه مجموعه داده ها.",
+ "Refresh Datasets": "بازاوری مجموعه داده ها",
+ "Dataset Creator": "سازنده مجموعه داده",
+ "Dataset Name": "نام مجموعه داده",
+ "Name of the new dataset.": "نام مجموعه داده های جدید",
+ "Enter dataset name": "وارد کردن نام مجموعه داده ها",
+ "Upload Audio Dataset": "اپلود مجموعه داده های صوتی",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "فایل صوتی با موفقیت به مجموعه داده ها اضافه شده است. لطفا دکمه پیش پردازش را فشار دهید.",
+ "Enter dataset path": "وارد کردن مسیر مجموعه داده ها",
+ "Sampling Rate": "نرخ نمونه برداری",
+ "The sampling rate of the audio files.": "نرخ نمونه برداری از فایل های صوتی.",
+ "Model Architecture": "نسخه RVC",
+ "Version of the model architecture.": "نسخه RVC از مدل.",
+ "Preprocess Dataset": "مجموعه داده پیش پردازش",
+ "Extract": "عصاره",
+ "Hop Length": "طول هاپ",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "نشان دهنده مدت زمان لازم برای سیستم برای انتقال به یک تغییر قابل توجه است. طول هاپ کوچکتر نیاز به زمان بیشتری برای استنتاج دارد اما تمایل به عملکرد دقت بالاتر دارد.",
+ "Batch Size": "اندازه دسته",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "توصیه می شود ان را با VRAM موجود GPU خود هماهنگ کنید. تنظیمات 4 ارائه می دهد دقت بهبود یافته اما پردازش کندتر، در حالی که 8 نتایج سریع تر و استاندارد را فراهم می کند.",
+ "Save Every Epoch": "ذخیره هر عصر",
+ "Determine at how many epochs the model will saved at.": "تعیین کنید که مدل در چند دوره ذخیره خواهد شد.",
+ "Total Epoch": "کل اپک",
+ "Specifies the overall quantity of epochs for the model training process.": "مقدار کلی دوره ها را برای فرایند اموزش مدل مشخص می کند.",
+ "Pretrained": "پیش اموزش دیده",
+ "Save Only Latest": "ذخیره فقط اخرین",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "فعال کردن این تنظیم منجر به صرفه جویی در فایل های G و D تنها نسخه های اخیر انها می شود و به طور موثر فضای ذخیره سازی را حفظ می کند.",
+ "Save Every Weights": "صرفه جویی در هر وزن",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "این تنظیم شما را قادر به صرفه جویی در وزن مدل در پایان هر دوره می کند.",
+ "Custom Pretrained": "سفارشی پیش اموزش دیده",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "استفاده از مدل های از پیش اموزش دیده سفارشی می تواند منجر به نتایج برتر شود، زیرا انتخاب مناسب ترین مدل های از پیش اموزش دیده متناسب با مورد استفاده خاص می تواند به طور قابل توجهی عملکرد را افزایش دهد.",
+ "Upload Pretrained Model": "اپلود مدل از پیش اموزش دیده",
+ "Refresh Custom Pretraineds": "تازه کردن Pretraineds سفارشی",
+ "Pretrained Custom Settings": "تنظیمات سفارشی از پیش اموزش داده شده",
+ "The file you dropped is not a valid pretrained file. Please try again.": "پرونده ای که حذف کرده اید یک پرونده از پیش اموزش داده شده معتبر نیست. لطفا دوباره تلاش کنید.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "با کلیک بر روی دکمه تازه کردن برای دیدن فایل از پیش اموزش دیده در منوی کشویی.",
+ "Pretrained G Path": "سفارشی پیش اموزش G",
+ "Pretrained D Path": "سفارشی از پیش اموزش دیده D",
+ "GPU Settings": "تنظیمات GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "تنظیمات پیشرفته GPU را تنظیم می کند که برای کاربران با معماری GPU بهتر توصیه می شود.",
+ "GPU Custom Settings": "تنظیمات سفارشی GPU",
+ "GPU Number": "شماره GPU",
+ "0 to ∞ separated by -": "0 به ∞ جدا شده توسط -",
+ "GPU Information": "اطلاعات GPU",
+ "Pitch Guidance": "راهنمای زمین",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "با استفاده از راهنمایی زمین، امکان پذیر است که زیر و بمی صدا اصلی، از جمله زمین ان را منعکس کند. این ویژگی به ویژه برای اواز خواندن و سناریوهای دیگر که در ان حفظ ملودی اصلی یا الگوی زمین ضروری است، ارزشمند است.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "از مدل های از پیش اموزش دیده در هنگام اموزش خود استفاده کنید. این رویکرد مدت زمان اموزش را کاهش می دهد و کیفیت کلی را افزایش می دهد.",
+ "Extract Features": "استخراج ویژگی ها",
+ "Start Training": "شروع اموزش",
+ "Generate Index": "ایجاد نمایه",
+ "Voice Model": "مدل صوتی",
+ "Select the voice model to use for the conversion.": "مدل صوتی مورد استفاده برای تبدیل را انتخاب کنید.",
+ "Index File": "پروندۀ نمایه",
+ "Select the index file to use for the conversion.": "انتخاب فایل شاخص برای استفاده برای تبدیل.",
+ "Refresh": "نوسازی",
+ "Unload Voice": "بارگیری صدا",
+ "Single": "تک",
+ "Upload Audio": "بارگذاری صدا",
+ "Select Audio": "انتخاب صدا",
+ "Select the audio to convert.": "صدا را برای تبدیل انتخاب کنید.",
+ "Advanced Settings": "تنظیمات پیشرفته",
+ "Clear Outputs (Deletes all audios in assets/audios)": "پاک کردن خروجی ها (حذف تمام فایل های صوتی در دارایی ها / audios)",
+ "Custom Output Path": "مسیر خروجی سفارشی",
+ "Output Path": "مسیر خروجی",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "مسیری که در ان صدای خروجی ذخیره می شود، به طور پیش فرض در assets/audios/output.wav",
+ "Split Audio": "تقسیم صوتی",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "صدا را به تکه های تقسیم کنید تا استنتاج شود تا در برخی موارد نتایج بهتری کسب شود.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "درخواست autotune نرم به استنتاج خود را، توصیه می شود برای تبدیل اواز.",
+ "Clean Audio": "پاک صوتی",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "خروجی صوتی خود را با استفاده از الگوریتم های تشخیص سر و صدا تمیز کنید، توصیه شده برای صحبت کردن صوتی.",
+ "Clean Strength": "قدرت پاک",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "سطح پاکسازی را به صوتی که می خواهید تنظیم کنید، هرچه بیشتر ان را افزایش دهید، بیشتر تمیز می شود، اما ممکن است صدا فشرده تر شود.",
+ "Pitch": "زمین",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "زمین صدا را تنظیم کنید، هر چه مقدار بالاتر باشد، زمین بالاتر است.",
+ "Filter Radius": "شعاع پالا",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "اگر عدد بزرگتر یا مساوی با سه باشد، استفاده از فیلتر متوسط در نتایج تن جمع اوری شده پتانسیل کاهش تنفس را دارد.",
+ "Search Feature Ratio": "نسبت ویژگی جستجو",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "تاثیر اعمال شده توسط فایل شاخص؛ ارزش بالاتر مربوط به نفوذ بیشتر است. با این حال، انتخاب مقادیر پایین تر می تواند به کاهش مصنوعات موجود در صدا کمک کند.",
+ "Volume Envelope": "پاکت جلد",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "جایگزین یا ترکیب با پاکت حجم خروجی. هر چه نسبت به 1 نزدیک تر باشد، پاکت خروجی بیشتر استفاده می شود.",
+ "Protect Voiceless Consonants": "محافظت از صامت های بی صدا",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "حفاظت از صامت های متمایز و صداهای تنفسی برای جلوگیری از پارگی الکترو اکوستیک و سایر مصنوعات. کشیدن پارامتر به حداکثر مقدار ان از 0.5 ارائه می دهد حفاظت جامع. با این حال، کاهش این مقدار ممکن است میزان حفاظت را کاهش دهد در حالی که به طور بالقوه اثر نمایه سازی را کاهش می دهد.",
+ "Pitch extraction algorithm": "الگوریتم استخراج زمین",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "الگوریتم استخراج زمین برای استفاده برای تبدیل صوتی. الگوریتم پیش فرض rmvpe است که برای اکثر موارد توصیه می شود.",
+ "Convert": "تبدیل",
+ "Export Audio": "صادرات صدا",
+ "Batch": "دسته",
+ "Input Folder": "پوشه ورودی",
+ "Select the folder containing the audios to convert.": "پوشه حاوی فایل های صوتی را برای تبدیل انتخاب کنید.",
+ "Enter input path": "وارد کردن مسیر ورودی",
+ "Output Folder": "پوشۀ خروجی",
+ "Select the folder where the output audios will be saved.": "پوشه ای را انتخاب کنید که صدای خروجی در ان ذخیره می شود.",
+ "Enter output path": "وارد کردن مسیر خروجی",
+ "Get information about the audio": "دریافت اطلاعات در مورد صدا",
+ "Information about the audio file": "اطلاعات مربوط به فایل صوتی",
+ "Waiting for information...": "در انتظار اطلاعات...",
+ "## Voice Blender": "## بلندر صوتی",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "دو مدل صوتی را انتخاب کنید، درصد ترکیب مورد نظر خود را تنظیم کنید و انها را به یک صدای کاملا جدید ترکیب کنید.",
+ "Voice Blender": "بلندر صوتی",
+ "Drag and drop your model here": "کشیدن و رها کردن مدل خود را در اینجا",
+ "You can also use a custom path.": "شما همچنین می توانید از یک مسیر سفارشی استفاده کنید.",
+ "Blend Ratio": "نسبت مخلوط",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "تنظیم موقعیت بیشتر به سمت یک طرف یا طرف دیگر، مدل را بیشتر شبیه به اول یا دوم می کند.",
+ "Fusion": "همجوشی",
+ "Path to Model": "مسیر به مدل",
+ "Enter path to model": "وارد کردن مسیر به مدل",
+ "Model information to be placed": "اطلاعات مدل قرار داده می شود",
+ "Inroduce the model information": "Inroduce اطلاعات مدل",
+ "The information to be placed in the model (You can leave it blank or put anything).": "اطلاعاتی که باید در مدل قرار داده شود (شما می توانید ان را خالی بگذارید یا هر چیزی را قرار دهید).",
+ "View model information": "مشاهده اطلاعات مدل",
+ "Introduce the model pth path": "معرفی مسیر PTH مدل",
+ "View": "مشاهده",
+ "Model extraction": "استخراج مدل",
+ "Model conversion": "تبدیل مدل",
+ "Pth file": "پرونده Pth",
+ "Output of the pth file": "خروجی پروندۀ pth",
+ "# How to Report an Issue on GitHub": "# چگونه یک مشکل را در GitHub گزارش دهیم",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. با کلیک بر روی 'ضبط صفحه نمایش' دکمه زیر برای شروع ضبط مسئله شما در حال تجربه.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. هنگامی که ضبط مسئله را به پایان رساندید، بر روی دکمه \"توقف ضبط\" کلیک کنید (همان دکمه، اما برچسب بسته به اینکه ایا شما به طور فعال ضبط می کنید یا نه تغییر می کند).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. به [GitHub Issues] (https://github.com/IAHispano/Applio/issues) بروید و بر روی دکمه \"New Issue\" کلیک کنید.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. تکمیل قالب موضوع ارائه شده، اطمینان حاصل شود که شامل جزئیات در صورت نیاز، و استفاده از بخش دارایی برای اپلود فایل ضبط شده از مرحله قبلی.",
+ "Record Screen": "صفحه ضبط",
+ "Record": "رکورد",
+ "Stop Recording": "توقف ضبط",
+ "Introduce the model .pth path": "معرفی مسیر .pth مدل",
+ "See Model Information": "مشاهده اطلاعات مدل",
+ "## Download Model": "## دانلود مدل",
+ "Model Link": "پیوند مدل",
+ "Introduce the model link": "معرفی لینک مدل",
+ "Download Model": "دانلود مدل",
+ "## Drop files": "## رها کردن پروندهها",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "فایل .pth و .index خود را به این فضا بکشید. یکی را بکشید و سپس دیگری را بکشید.",
+ "TTS Voices": "TTS صداها",
+ "Select the TTS voice to use for the conversion.": "صدای TTS را برای استفاده برای تبدیل انتخاب کنید.",
+ "Text to Synthesize": "متن برای سنتز",
+ "Enter the text to synthesize.": "متن را برای ترکیب وارد کنید.",
+ "Or you can upload a .txt file": "یا شما می توانید یک فایل .txt اپلود کنید",
+ "Enter text to synthesize": "متن را برای سنتز وارد کنید",
+ "Output Path for TTS Audio": "مسیر خروجی برای صدای TTS",
+ "Output Path for RVC Audio": "مسیر خروجی برای RVC Audio",
+ "Enable Applio integration with Discord presence": "ادغام Applio را با حضور Discord فعال کنید",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "این امکان نمایش فعالیت Applio فعلی در Discord را فعال خواهد کرد.",
+ "Enable Applio integration with applio.org/models using flask": "فعال کردن ادغام Applio با applio.org/models با استفاده از فلاسک",
+ "It will activate the possibility of downloading models with a click from the website.": "این امکان دانلود مدل ها را با یک کلیک از وب سایت فعال می کند.",
+ "Theme": "تم",
+ "Select the theme you want to use. (Requires restarting Applio)": "زمینه ای را که می خواهید استفاده کنید انتخاب کنید. (نیاز به راه اندازی مجدد Applio)",
+ "Language": "زبان",
+ "Select the language you want to use. (Requires restarting Applio)": "زبانی را که می خواهید استفاده کنید انتخاب کنید. (نیاز به راه اندازی مجدد Applio)",
+ "Plugin Installer": "نصب افزونه",
+ "Drag your plugin.zip to install it": "plugin.zip خود را بکشید تا ان را نصب کنید",
+ "Version Checker": "بررسی نسخه",
+ "Check which version of Applio is the latest to see if you need to update.": "بررسی کنید که کدام نسخه از Applio اخرین است تا ببینید ایا شما نیاز به به روز رسانی دارید.",
+ "Check for updates": "بررسی برای به روز رسانی"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/fr_FR.json b/assets/i18n/languages/fr_FR.json
new file mode 100644
index 0000000000000000000000000000000000000000..107f92f8e12bfca14e5e0d19b6c29a47ba0f8fe8
--- /dev/null
+++ b/assets/i18n/languages/fr_FR.json
@@ -0,0 +1,230 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Outil ultime de clonage vocal, méticuleusement optimisé pour une puissance, une modularité et une expérience conviviales inégalées.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Cette section contient quelques utilitaires supplémentaires qui peuvent souvent être en phase expérimentale.",
+ "Output Information": "Informations de sortie",
+ "The output information will be displayed here.": "Les informations de sortie seront affichées ici.",
+
+ "Inference": "Inférence",
+ "Train": "Train",
+ "Extra": "Supplémentaire",
+ "Merge Audios": "Fusionner les audios",
+ "Processing": "Traitement",
+ "Audio Analyzer": "Analyseur audio",
+ "Model Information": "Informations sur le modèle",
+ "Plugins": "Plugins (Plugins)",
+ "Download": "Télécharger",
+ "Report a Bug": "Signaler un bogue",
+ "Settings": "Paramètres",
+
+ "Preprocess": "Pré-traitement",
+ "Model Name": "Nom du modèle",
+ "Name of the new model.": "Nom du nouveau modèle.",
+ "Enter model name": "Entrez le nom du modèle",
+ "Dataset Path": "Chemin d’accès au jeu de données",
+ "Path to the dataset folder.": "Chemin d’accès au dossier du jeu de données.",
+ "Refresh Datasets": "Actualiser les jeux de données",
+ "Dataset Creator": "Créateur de jeux de données",
+ "Dataset Name": "Nom du jeu de données",
+ "Name of the new dataset.": "Nom du nouveau jeu de données.",
+ "Enter dataset name": "Entrez le nom du jeu de données",
+ "Upload Audio Dataset": "Envoyer un jeu de données audio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Le fichier audio a été ajouté avec succès au jeu de données. Veuillez cliquer sur le bouton de prétraitement.",
+ "Enter dataset path": "Entrez le chemin d’accès au jeu de données",
+ "Sampling Rate": "Fréquence d’échantillonnage",
+ "The sampling rate of the audio files.": "Fréquence d’échantillonnage des fichiers audio.",
+ "Model Architecture": "Model Architecture",
+ "Version of the model architecture.": "La version RVC du modèle.",
+ "Preprocess Dataset": "Jeu de données de prétraitement",
+ "Extract": "Extraire",
+ "Hop Length": "Longueur du houblon",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Indique le temps qu’il faut au système pour passer à un changement de hauteur significatif. Les longueurs de saut plus petites nécessitent plus de temps pour l’inférence, mais ont tendance à donner une plus grande précision de hauteur.",
+ "Model used for learning speaker embedding.": "Modèle utilisé pour apprendre l’intégration des haut-parleurs.",
+ "Batch Size": "Taille du lot",
+
+ "Embedder Model": "Embedder Model",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Il est conseillé de l’aligner sur la VRAM disponible de votre GPU. Un réglage de 4 offre une précision améliorée mais un traitement plus lent, tandis que 8 fournit des résultats plus rapides et standard.",
+ "Save Every Epoch": "Sauvez toutes les époques",
+ "Determine at how many epochs the model will saved at.": "Déterminez à combien d’époques le modèle sera enregistré.",
+ "Total Epoch": "Époque totale",
+ "Specifies the overall quantity of epochs for the model training process.": "Spécifie la quantité globale d’époques pour le processus d’entraînement du modèle.",
+ "Pretrained": "Pré-entraîné",
+ "Save Only Latest": "Enregistrer uniquement les plus récents",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "L’activation de ce paramètre permet aux fichiers G et D d’enregistrer uniquement leurs versions les plus récentes, ce qui permet d’économiser efficacement de l’espace de stockage.",
+ "Save Every Weights": "Économisez tous les poids",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Ce paramètre vous permet d’enregistrer les poids du modèle à la fin de chaque époque.",
+ "Custom Pretrained": "Pré-entraîné sur mesure",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "L’utilisation de modèles préentraînés personnalisés peut conduire à des résultats supérieurs, car la sélection des modèles préentraînés les plus appropriés et adaptés au cas d’utilisation spécifique peut améliorer considérablement les performances.",
+ "Upload Pretrained Model": "Envoyer un modèle pré-entraîné",
+ "Refresh Custom Pretraineds": "Actualiser les pré-entraînés personnalisés",
+ "Pretrained Custom Settings": "Paramètres personnalisés préentraînés",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Le fichier que vous avez déposé n’est pas un fichier pré-entraîné valide. Veuillez réessayer.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Cliquez sur le bouton d’actualisation pour afficher le fichier préentraîné dans le menu déroulant.",
+ "Pretrained G Path": "G pré-entraîné personnalisé",
+ "Pretrained D Path": "D pré-entraîné personnalisé",
+ "GPU Settings": "Paramètres GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Définit les paramètres GPU avancés, recommandés pour les utilisateurs disposant d’une meilleure architecture GPU.",
+ "GPU Custom Settings": "Paramètres personnalisés du GPU",
+ "GPU Number": "Numéro de GPU",
+ "0 to ∞ separated by -": "0 à ∞ séparés par -",
+ "GPU Information": "Informations sur le GPU",
+ "Pitch Guidance": "Guidage de la hauteur",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "En utilisant le guidage de hauteur, il devient possible de refléter l’intonation de la voix d’origine, y compris sa hauteur. Cette fonctionnalité est particulièrement utile pour le chant et d’autres scénarios où la préservation de la mélodie ou du modèle de hauteur d’origine est essentielle.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilisez des modèles pré-entraînés lors de l’entraînement des vôtres. Cette approche permet de réduire la durée de la formation et d’améliorer la qualité globale.",
+ "Extract Features": "Extraire des caractéristiques",
+ "Overtraining Detector": "Détecteur de surentraînement",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Détectez le surentraînement pour éviter que le modèle n'apprenne trop bien les données d'entraînement et ne perde la capacité de généraliser à de nouvelles données.",
+ "Overtraining Detector Settings": "Paramètres du détecteur de surentraînement",
+ "Overtraining Threshold": "Seuil de surentraînement",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Définissez le nombre maximal d’époques pendant lesquelles vous souhaitez que votre modèle arrête l’entraînement si aucune amélioration n’est détectée.",
+ "Sync Graph": "Graphique de synchronisation",
+ "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronisez le graphique du tensorbaord. Activez ce paramètre uniquement si vous entraînez un nouveau modèle.",
+
+ "Start Training": "Commencer l’entraînement",
+ "Stop Training & Restart Applio": "Arrêter l’entraînement et redémarrer Applio",
+ "Generate Index": "Générer un index",
+
+ "Export Model": "Modèle d’exportation",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "Le bouton « Upload » est uniquement destiné à Google Colab : upload les fichiers exportés dans le dossier ApplioExported de votre Google Drive.",
+ "Exported Pth file": "Fichier Pth exporté",
+ "Exported Index file": "Fichier d’index exporté",
+ "Select the pth file to be exported": "Sélectionnez le fichier pth à exporter",
+ "Select the index file to be exported": "Sélectionnez le fichier d’index à exporter",
+ "Upload": "Envoyer",
+
+ "Voice Model": "Modèle vocal",
+ "Select the voice model to use for the conversion.": "Sélectionnez le modèle vocal à utiliser pour la conversion.",
+ "Index File": "Fichier d’index",
+ "Select the index file to use for the conversion.": "Sélectionnez le fichier d’index à utiliser pour la conversion.",
+ "Refresh": "Rafraîchir",
+ "Unload Voice": "Décharger la voix",
+
+ "Single": "Seul",
+ "Upload Audio": "Envoyer l’audio",
+ "Select Audio": "Sélectionnez Audio (Audio)",
+ "Select the audio to convert.": "Sélectionnez l’audio à convertir.",
+ "Advanced Settings": "Paramètres avancés",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Effacer les sorties (supprime tous les audios dans les ressources/audios)",
+ "Custom Output Path": "Chemin de sortie personnalisé",
+ "Output Path": "Chemin de sortie",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Le chemin où l’audio de sortie sera enregistré, par défaut dans assets/audios/output.wav",
+ "Split Audio": "Diviser l’audio",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Divisez l’audio en morceaux pour l’inférence afin d’obtenir de meilleurs résultats dans certains cas.",
+ "Autotune": "Réglage automatique",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Appliquez un réglage automatique doux à vos inférences, recommandé pour les conversions de chant.",
+ "Clean Audio": "Audio clair",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Nettoyez votre sortie audio à l’aide d’algorithmes de détection de bruit, recommandés pour les audios parlants.",
+ "Clean Strength": "Force propre",
+ "Upscale Audio": "Audio haut de gamme",
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Améliorez l’audio vers une qualité supérieure, recommandé pour les audios de faible qualité. (Le traitement de l’audio pourrait prendre plus de temps)",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Réglez le niveau de nettoyage sur l’audio que vous souhaitez, plus vous l’augmentez plus il nettoiera, mais il est possible que l’audio soit plus compressé.",
+ "Pitch": "Tanguer",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Réglez la hauteur de l’audio, plus la valeur est élevée, plus la hauteur est élevée.",
+ "Filter Radius": "Rayon du filtre",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Si le nombre est supérieur ou égal à trois, l’utilisation d’un filtrage médian sur les résultats de tonalité recueillis a le potentiel de diminuer la respiration.",
+ "Search Feature Ratio": "Rapport de caractéristiques de recherche",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influence exercée par le fichier d’index ; Une valeur plus élevée correspond à une plus grande influence. Cependant, opter pour des valeurs plus faibles peut aider à atténuer les artefacts présents dans l’audio.",
+ "Volume Envelope": "Enveloppe de volume",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Remplacez ou fusionnez avec l’enveloppe de volume de la sortie. Plus le rapport est proche de 1, plus l’enveloppe de sortie est utilisée.",
+ "Protect Voiceless Consonants": "Protéger les consonnes sourdes",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Protégez les consonnes distinctes et les sons respiratoires pour éviter les déchirures électroacoustiques et autres artefacts. L’extraction du paramètre à sa valeur maximale de 0,5 offre une protection complète. Toutefois, la réduction de cette valeur peut réduire l’étendue de la protection tout en atténuant potentiellement l’effet d’indexation.",
+ "Pitch extraction algorithm": "Algorithme d’extraction de hauteur",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algorithme d’extraction de hauteur à utiliser pour la conversion audio. L’algorithme par défaut est rmvpe, qui est recommandé dans la plupart des cas.",
+
+ "Convert": "Convertir",
+ "Export Audio": "Exporter l’audio",
+
+ "Batch": "Lot",
+ "Input Folder": "Dossier d’entrée",
+ "Select the folder containing the audios to convert.": "Sélectionnez le dossier contenant les audios à convertir.",
+ "Enter input path": "Entrez le chemin d’entrée",
+ "Output Folder": "Dossier de sortie",
+ "Select the folder where the output audios will be saved.": "Sélectionnez le dossier dans lequel les audios de sortie seront enregistrés.",
+ "Enter output path": "Entrez le chemin de sortie",
+
+ "Get information about the audio": "Obtenir des informations sur l’audio",
+
+ "## Voice Blender": "## Mélangeur de voix",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Sélectionnez deux modèles de voix, définissez le pourcentage de fusion souhaité et mélangez-les en une toute nouvelle voix.",
+ "Voice Blender": "Mélangeur de voix",
+ "Drag and drop your model here": "Faites glisser et déposez votre modèle ici",
+ "You can also use a custom path.": "Vous pouvez également utiliser un chemin personnalisé.",
+ "Blend Ratio": "Rapport de mélange",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "En ajustant la position d’un côté ou de l’autre, le modèle sera plus similaire au premier ou au second.",
+ "Fusion": "Fusion",
+
+ "Path to Model": "Chemin d’accès au modèle",
+ "Enter path to model": "Entrez le chemin d’accès au modèle",
+ "Model information to be placed": "Informations sur le modèle à placer",
+ "Inroduce the model information": "Introduire les informations du modèle",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Les informations à placer dans le modèle (vous pouvez les laisser vides ou mettre n’importe quoi).",
+ "View model information": "Afficher les informations sur le modèle",
+ "Introduce the model pth path": "Présentation du modèle pth path",
+ "View": "Vue",
+ "Model extraction": "Extraction de modèles",
+ "Model conversion": "Conversion de modèle",
+ "Pth file": "Fichier Pth",
+ "Output of the pth file": "Sortie du fichier pth",
+
+ "Extract F0 Curve": "Extraire la courbe F0",
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "La courbe f0 représente les variations de la fréquence de base d’une voix au fil du temps, montrant comment la hauteur monte et descend.",
+
+ "# How to Report an Issue on GitHub": "# Comment signaler un problème sur GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Cliquez sur le bouton « Enregistrer l’écran » ci-dessous pour commencer à enregistrer le problème que vous rencontrez.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Une fois que vous avez terminé d’enregistrer le numéro, cliquez sur le bouton « Arrêter l’enregistrement » (le même bouton, mais l’étiquette change selon que vous enregistrez activement ou non).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Allez dans [GitHub Issues](https://github.com/IAHispano/Applio/issues) et cliquez sur le bouton « New issue ».",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complétez le modèle de problème fourni, en veillant à inclure les détails nécessaires, et utilisez la section des ressources pour envoyer le fichier enregistré de l’étape précédente.",
+
+ "Record Screen": "Écran d’enregistrement",
+ "Record": "Enregistrer",
+ "Stop Recording": "Arrêter l’enregistrement",
+
+ "Introduce the model .pth path": "Présentation du chemin d’accès .pth du modèle",
+ "See Model Information": "Voir les informations sur le modèle",
+
+ "## Download Model": "## Télécharger le modèle",
+ "Model Link": "Lien vers le modèle",
+ "Introduce the model link": "Introduire le lien du modèle",
+ "Download Model": "Télécharger le modèle",
+ "## Drop files": "## Déposer des fichiers",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Faites glisser vos fichiers .pth et .index dans cet espace. Faites glisser l’un, puis l’autre.",
+ "## Search Model": "## Modèle de recherche",
+ "Search": "Recherche",
+ "Introduce the model name to search.": "Introduisez le nom du modèle à rechercher.",
+ "We couldn't find models by that name.": "Nous n’avons pas pu trouver de modèles portant ce nom.",
+ "## Download Pretrained Models": "## Télécharger des modèles pré-entraînés",
+ "Select the pretrained model you want to download.": "Sélectionnez le modèle pré-entraîné que vous souhaitez télécharger.",
+ "And select the sampling rate": "Et sélectionnez le taux d’échantillonnage",
+
+ "TTS Voices": "Voix TTS",
+ "TTS Speed": "Vitesse TTS",
+ "Increase or decrease TTS speed.": "Augmenter ou diminuer la vitesse du TTS.",
+ "Select the TTS voice to use for the conversion.": "Sélectionnez la voix TTS à utiliser pour la conversion.",
+ "Text to Synthesize": "Texte à synthétiser",
+ "Enter the text to synthesize.": "Saisissez le texte à synthétiser.",
+ "Or you can upload a .txt file": "Vous pouvez également utiliser un fichier .txt",
+ "Enter text to synthesize": "Saisir le texte à synthétiser",
+ "Output Path for TTS Audio": "Chemin de sortie pour l’audio TTS",
+ "Output Path for RVC Audio": "Chemin de sortie pour l’audio RVC",
+
+ "Enable Applio integration with Discord presence": "Activer l’intégration d’Applio avec la présence de Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Il activera la possibilité d’afficher l’activité actuelle d’Applio dans Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Activer l’intégration d’Applio avec applio.org/models à l’aide de flask",
+ "It will activate the possibility of downloading models with a click from the website.": "Il activera la possibilité de télécharger des modèles en un clic depuis le site web.",
+ "Enable fake GPU": "Activer le faux GPU",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "La formation n’est actuellement pas prise en charge en raison de l’absence de GPU. Pour activer l’onglet de formation, accédez à l’onglet Paramètres et activez l’option « Faux GPU ».",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Active l’onglet train. Cependant, veuillez noter que cet appareil ne dispose pas de capacités GPU et que la formation n’est donc pas prise en charge. Cette option est uniquement destinée à des fins de test. (Cette option redémarrera Applio)",
+ "Theme": "Thème",
+ "Select the theme you want to use. (Requires restarting Applio)": "Sélectionnez le thème que vous souhaitez utiliser. (Nécessite le redémarrage d’Applio)",
+ "Precision": "Précision",
+ "Select the precision you want to use for training and inference.": "Sélectionnez la précision que vous souhaitez utiliser pour la formation et l’inférence.",
+ "Update precision": "Mettre à jour la précision",
+ "Language": "Langue",
+ "Select the language you want to use. (Requires restarting Applio)": "Sélectionnez la langue que vous souhaitez utiliser. (Nécessite le redémarrage d’Applio)",
+
+ "Plugin Installer": "Installateur de plugin",
+ "Drag your plugin.zip to install it": "Faites glisser votre plugin.zip pour l’installer",
+
+ "Version Checker": "Vérificateur de version",
+ "Check which version of Applio is the latest to see if you need to update.": "Vérifiez quelle version d’Applio est la plus récente pour voir si vous devez effectuer une mise à jour.",
+ "Check for updates": "Vérifier les mises à jour"
+}
diff --git a/assets/i18n/languages/gu_GU.json b/assets/i18n/languages/gu_GU.json
new file mode 100644
index 0000000000000000000000000000000000000000..aef1885845c8a9815c53b5fcd494a2f85acedf76
--- /dev/null
+++ b/assets/i18n/languages/gu_GU.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "અંતિમ વોઇસ ક્લોનિંગ ટૂલ, જે અજોડ શક્તિ, મોડ્યુલારિટી અને વપરાશકર્તા-મૈત્રીપૂર્ણ અનુભવ માટે સાવચેતીપૂર્વક ઓપ્ટિમાઇઝ કરવામાં આવ્યું છે.",
+ "This section contains some extra utilities that often may be in experimental phases.": "આ વિભાગમાં કેટલીક વધારાની ઉપયોગિતાઓ છે જે ઘણીવાર પ્રાયોગિક તબક્કામાં હોઈ શકે છે.",
+ "Output Information": "આઉટપુટ જાણકારી",
+ "The output information will be displayed here.": "આઉટપુટ જાણકારી અંહિ દર્શાવવામાં આવશે.",
+ "Inference": "અનુમાન",
+ "Train": "રેલગાડી",
+ "Extra": "વધારાનું",
+ "Merge Audios": "ઓડિયો ભેગા કરો",
+ "Processing": "પ્રક્રિયા કરી રહ્યા છીએ",
+ "Audio Analyzer": "ઓડિયો વિશ્લેષક",
+ "Model Information": "મોડેલ જાણકારી",
+ "Plugins": "પ્લગઇનો",
+ "Download": "ડાઉનલોડ",
+ "Report a Bug": "ભૂલનો અહેવાલ આપો",
+ "Settings": "સુયોજનો",
+ "Preprocess": "પ્રીપ્રોસેસ",
+ "Model Name": "મોડેલ નામ",
+ "Name of the new model.": "નવા મોડેલનું નામ.",
+ "Enter model name": "મોડેલ નામ દાખલ કરો",
+ "Dataset Path": "ડેટાસેટ પાથ",
+ "Path to the dataset folder.": "ડેટાસેટ ફોલ્ડરનો પાથ.",
+ "Refresh Datasets": "ડેટાસેટ્સ પુનઃતાજું કરો",
+ "Dataset Creator": "ડેટાસેટ બનાવનાર",
+ "Dataset Name": "ડેટાસેટ નામ",
+ "Name of the new dataset.": "નવા ડેટાસેટનું નામ.",
+ "Enter dataset name": "ડેટાસેટ નામ દાખલ કરો",
+ "Upload Audio Dataset": "ઓડિયો ડેટાસેટ અપલોડ કરો",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ઓડિયો ફાઈલ સફળતાપૂર્વક ડેટાસેટમાં ઉમેરાઈ ગઈ છે. કૃપા કરીને પ્રીપ્રોસેસ બટન પર ક્લિક કરો.",
+ "Enter dataset path": "ડેટાસેટ પાથને દાખલ કરો",
+ "Sampling Rate": "નમૂનાનો દર",
+ "The sampling rate of the audio files.": "ઓડિયો ફાઈલોનો નમૂનાનો દર.",
+ "Model Architecture": "RVC આવૃત્તિ",
+ "Version of the model architecture.": "મોડેલનું આરવીસી વર્ઝન.",
+ "Preprocess Dataset": "પ્રીપ્રોસેસ ડેટાસેટ",
+ "Extract": "અર્ક કાઢો",
+ "Hop Length": "હોપ લંબાઈ",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "સિસ્ટમને નોંધપાત્ર પિચ પરિવર્તનમાં સંક્રમણ કરવામાં જે સમયગાળો લાગે છે તે સૂચવે છે. નાના હોપની લંબાઈને અનુમાન માટે વધુ સમયની જરૂર હોય છે પરંતુ ઉચ્ચ પિચ ચોકસાઈ પ્રાપ્ત કરવાનું વલણ ધરાવે છે.",
+ "Batch Size": "બેચ માપ",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "તેને તમારા જીપીયુના ઉપલબ્ધ વીઆરએએમ સાથે સંરેખિત કરવાની સલાહ આપવામાં આવે છે. 4નું સેટિંગ સુધારેલી ચોકસાઈ આપે છે પરંતુ પ્રક્રિયા ધીમી છે, જ્યારે 8 ઝડપી અને પ્રમાણભૂત પરિણામો પ્રદાન કરે છે.",
+ "Save Every Epoch": "દરેક ઈપોકનો સંગ્રહ કરો",
+ "Determine at how many epochs the model will saved at.": "મોડેલ કેટલા યુગમાં સંગ્રહાશે તે નક્કી કરો.",
+ "Total Epoch": "કુલ ઈપોક",
+ "Specifies the overall quantity of epochs for the model training process.": "મોડેલ તાલીમ પ્રક્રિયા માટે યુગોનો એકંદર જથ્થો સ્પષ્ટ કરે છે.",
+ "Pretrained": "પૂર્વપ્રશિક્ષિત",
+ "Save Only Latest": "ફક્ત તાજેતરનાં ને સંગ્રહો",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "આ ગોઠવણને સક્રિય કરવાથી જી અને ડી (G) અને ડી (D) ફાઇલો માત્ર તેમના તાજેતરના વર્ઝનને જ સેવ કરી શકશે, અસરકારક રીતે સ્ટોરેજ સ્પેસનું સંરક્ષણ કરશે.",
+ "Save Every Weights": "દરેક વજનોને બચાવો",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "આ ગોઠવણી તમને દરેક યુગના અંતે મોડેલના વજનને બચાવવા માટે સક્ષમ બનાવે છે.",
+ "Custom Pretrained": "વૈવિધ્યપૂર્ણ પૂર્વટ્રેઈન થયેલ",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "કસ્ટમ પ્રીટ્રેઇન્ડ મોડેલ્સનો ઉપયોગ કરવાથી શ્રેષ્ઠ પરિણામો મળી શકે છે, કારણ કે ચોક્કસ ઉપયોગના કિસ્સાને અનુરૂપ સૌથી યોગ્ય પ્રિટ્રેઇન્ડ મોડેલ્સની પસંદગી કરવાથી કામગીરીમાં નોંધપાત્ર વધારો થઈ શકે છે.",
+ "Upload Pretrained Model": "પહેલેથી પ્રશિક્ષિત મોડેલ અપલોડ કરો",
+ "Refresh Custom Pretraineds": "કસ્ટમ પૂર્વપ્રશિક્ષિતોને તાજુ કરો",
+ "Pretrained Custom Settings": "પહેલેથી પ્રશિક્ષિત વૈવિધ્યપૂર્ણ સુયોજનો",
+ "The file you dropped is not a valid pretrained file. Please try again.": "તમે મૂકેલી ફાઇલ એ યોગ્ય પૂર્વતાલીમવાળી ફાઇલ નથી. કૃપા કરીને ફરી પ્રયાસ કરો.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "ડ્રોપડાઉન મેનુમાં પહેલેથી તાલીમ લીધેલ ફાઈલ જોવા માટે રિફ્રેશ બટન પર ક્લિક કરો.",
+ "Pretrained G Path": "વૈવિધ્યપૂર્ણ પૂર્વપ્રશિક્ષિત G",
+ "Pretrained D Path": "વૈવિધ્યપૂર્ણ પૂર્વપ્રશિક્ષિત D",
+ "GPU Settings": "GPU સુયોજનો",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "અદ્યતન GPU ગોઠવણો સુયોજિત કરે છે, જે વધુ સારા GPU આર્કિટેક્ચર ધરાવતા વપરાશકર્તાઓ માટે આગ્રહણીય છે.",
+ "GPU Custom Settings": "GPU કસ્ટમ સુયોજનો",
+ "GPU Number": "GPU નંબર",
+ "0 to ∞ separated by -": "0 થી ∞ આના દ્વારા અલગ પાડવામાં આવે છે -",
+ "GPU Information": "GPU જાણકારી",
+ "Pitch Guidance": "પિચ માર્ગદર્શન",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "પિચ માર્ગદર્શનનો ઉપયોગ કરીને, મૂળ અવાજના રણકારને પ્રતિબિંબિત કરવાનું શક્ય બને છે, જેમાં તેની પીચનો પણ સમાવેશ થાય છે. આ સુવિધા ખાસ કરીને ગાવા અને અન્ય દૃશ્યો માટે મૂલ્યવાન છે જ્યાં મૂળ મેલોડી અથવા પિચ પેટર્નને સાચવવી જરૂરી છે.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "તમારા પોતાનાને તાલીમ આપતી વખતે પૂર્વપ્રશિક્ષિત મોડેલોનો ઉપયોગ કરો. આ અભિગમ તાલીમનો સમયગાળો ઘટાડે છે અને એકંદર ગુણવત્તામાં વધારો કરે છે.",
+ "Extract Features": "લક્ષણોનો અર્ક કાઢો",
+ "Start Training": "તાલીમ શરૂ કરો",
+ "Generate Index": "અનુક્રમણિકા બનાવો",
+ "Voice Model": "અવાજ મોડેલ",
+ "Select the voice model to use for the conversion.": "રૂપાંતરણ માટે વાપરવા માટે વોઇસ મોડેલ પસંદ કરો.",
+ "Index File": "અનુક્રમણિકા ફાઇલ",
+ "Select the index file to use for the conversion.": "રૂપાંતરણ માટે વાપરવા માટે અનુક્રમણિકા ફાઈલ પસંદ કરો.",
+ "Refresh": "પુનઃતાજું કરો",
+ "Unload Voice": "અવાજ અનલોડ કરો",
+ "Single": "એકલું",
+ "Upload Audio": "ઓડિયો અપલોડ કરો",
+ "Select Audio": "ઓડિયો પસંદ કરો",
+ "Select the audio to convert.": "રૂપાંતરિત કરવા માટે ઓડિયો પસંદ કરો.",
+ "Advanced Settings": "અદ્યતન સુયોજનો",
+ "Clear Outputs (Deletes all audios in assets/audios)": "આઉટપુટ સાફ કરો (સંપત્તિઓ/ઓડિયોમાં બધા ઓડિયો કાઢી નાંખે છે)",
+ "Custom Output Path": "કસ્ટમ આઉટપુટ પાથ",
+ "Output Path": "આઉટપુટ પાથ",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "પાથ કે જ્યાં આઉટપુટ ઓડિયો એ મૂળભૂત રીતે assets/audios/output.wav માં સંગ્રહાશે",
+ "Split Audio": "ઓડિયો વિભાજિત કરો",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "કેટલાક કિસ્સાઓમાં વધુ સારા પરિણામો મેળવવા માટે અનુમાન માટે ઓડિઓને ભાગોમાં વિભાજિત કરો.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "તમારા અનુમાનો પર નરમ ઓટોટ્યુન લાગુ કરો, જે ગાવા માટે ભલામણ કરવામાં આવે છે રૂપાંતરણો ગાવા માટે ભલામણ કરવામાં આવે છે.",
+ "Clean Audio": "ઓડિયો સાફ કરો",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ઓડિયો બોલવા માટે ભલામણ કરવામાં આવેલા નોઇઝ ડિટેક્શન એલ્ગોરિધમ્સનો ઉપયોગ કરીને તમારા ઓડિયો આઉટપુટને સાફ કરો.",
+ "Clean Strength": "સ્વચ્છ મજબૂતાઈ",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "તમે ઇચ્છો તે ઓડિયો પર સફાઇનું સ્તર સુયોજિત કરો, તમે તેને જેટલું વધારશો તેટલું તે સાફ થશે, પરંતુ શક્ય છે કે ઓડિયો વધુ સંકુચિત હશે.",
+ "Pitch": "પિચ",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ઓડિયોની પિચ સુયોજિત કરો, કિંમત જેટલી ઊંચી હશે, તેટલી પીચ વધુ ઊંચી હશે.",
+ "Filter Radius": "ફિલ્ટર ત્રિજ્યા",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "જો સંખ્યા ત્રણ કરતા વધારે અથવા સમાન હોય, તો એકત્રિત સ્વર પરિણામો પર મધ્યમ ફિલ્ટરિંગનો ઉપયોગ કરવાથી શ્વસનનક્કી થવાની સંભાવના રહે છે.",
+ "Search Feature Ratio": "શોધ લક્ષણ ગુણોત્તર",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "અનુક્રમણિકા ફાઈલ દ્વારા લાગતો પ્રભાવ; ઉચ્ચતર મૂલ્ય વધુ પ્રભાવને મળતું આવે છે. જો કે, નીચા મૂલ્યોની પસંદગી કરવાથી ઓડિયોમાં હાજર કલાકૃતિઓને ઘટાડવામાં મદદ મળી શકે છે.",
+ "Volume Envelope": "વોલ્યુમ કવરpaper size",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "આઉટપુટના વોલ્યુમ પરબિડીયા સાથે અવેજી અથવા મિશ્રણ કરો. ગુણોત્તર 1 ની નજીક હોય છે, આઉટપુટ પરબિડીયાને વધુ ઉપયોગમાં લેવામાં આવે છે.",
+ "Protect Voiceless Consonants": "અવાજ વગરના વ્યંજનોનું રક્ષણ કરો",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "ઇલેક્ટ્રો-એકોસ્ટિક ફાટી ન જાય અને અન્ય કલાકૃતિઓ ન ફાટી જાય તે માટે વિશિષ્ટ વ્યંજનો અને શ્વાસોચ્છવાસના અવાજનું રક્ષણ કરો. પરિમાણને તેના ૦.૫ ના મહત્તમ મૂલ્ય તરફ ખેંચવું એ વ્યાપક સુરક્ષા પ્રદાન કરે છે. જો કે, આ મૂલ્યમાં ઘટાડો કરવાથી અનુક્રમણિકાની અસરને સંભવિતપણે ઘટાડવાની સાથે સંરક્ષણની હદમાં ઘટાડો થઈ શકે છે.",
+ "Pitch extraction algorithm": "પિચ નિષ્કર્ષણ અલગોરિધમ",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ઓડિઓ રૂપાંતર માટે વાપરવા માટે પિચ નિષ્કર્ષણ અલ્ગોરિધમનો. મૂળભૂત અલ્ગોરિધમ એ rmvpe છે, જે મોટાભાગના કિસ્સાઓમાં ભલામણ કરવામાં આવે છે.",
+ "Convert": "રૂપાંતર કરો",
+ "Export Audio": "ઓડિયો નિકાસ કરો",
+ "Batch": "બેચ",
+ "Input Folder": "ઇનપુટ ફોલ્ડર",
+ "Select the folder containing the audios to convert.": "ફેરવવા માટે ઓડિયો સમાવતા ફોલ્ડરને પસંદ કરો.",
+ "Enter input path": "ઇનપુટ પાથને દાખલ કરો",
+ "Output Folder": "આઉટપુટ ફોલ્ડર",
+ "Select the folder where the output audios will be saved.": "ફોલ્ડર પસંદ કરો કે જ્યાં આઉટપુટ ઓડિયો સંગ્રહાશે.",
+ "Enter output path": "આઉટપુટ પાથ દાખલ કરો",
+ "Get information about the audio": "ઓડિયો વિશે જાણકારી મેળવો",
+ "Information about the audio file": "ઓડિયો ફાઈલ વિશેની માહિતી",
+ "Waiting for information...": "જાણકારી માટે રાહ જોઇ રહ્યા છીએ...",
+ "## Voice Blender": "## વોઇસ બ્લેન્ડર",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "બે વોઇસ મોડલ્સ પસંદ કરો, તમારી ઇચ્છિત મિશ્રણ ટકાવારી સેટ કરો અને તેને સંપૂર્ણપણે નવા અવાજમાં મિશ્રિત કરો.",
+ "Voice Blender": "અવાજ બ્લેન્ડર",
+ "Drag and drop your model here": "તમારા મોડેલને અંહિ ખેંચો અને મૂકો",
+ "You can also use a custom path.": "તમે કસ્ટમ પાથનો પણ ઉપયોગ કરી શકો છો.",
+ "Blend Ratio": "મિશ્રણ ગુણોત્તર",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "સ્થિતિને એક બાજુ અથવા બીજી બાજુ વધુ સમાયોજિત કરવાથી મોડેલને પ્રથમ અથવા બીજી બાજુ વધુ સમાન બનાવશે.",
+ "Fusion": "ફ્યુઝન",
+ "Path to Model": "મોડેલનો પાથ",
+ "Enter path to model": "મોડેલ માટે પાથને દાખલ કરો",
+ "Model information to be placed": "મૂકવાની મોડેલ માહિતી",
+ "Inroduce the model information": "મોડેલની જાણકારીને ઇનરોડ્યુસ કરો",
+ "The information to be placed in the model (You can leave it blank or put anything).": "મોડેલમાં મૂકવાની માહિતી (તમે તેને ખાલી છોડી શકો છો અથવા કંઈપણ મૂકી શકો છો).",
+ "View model information": "મોડેલ જાણકારી જુઓ",
+ "Introduce the model pth path": "મોડેલ pth પાથનો પરિચય આપો",
+ "View": "જુઓ",
+ "Model extraction": "મોડેલ નિષ્કર્ષણ",
+ "Model conversion": "મોડેલ રૂપાંતરણ",
+ "Pth file": "Pth ફાઈલ",
+ "Output of the pth file": "pth ફાઇલનું આઉટપુટ",
+ "# How to Report an Issue on GitHub": "# GitHub પર કોઈ સમસ્યાની જાણ કેવી રીતે કરવી",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. તમે જે સમસ્યાનો સામનો કરી રહ્યા છો તેને રેકોર્ડ કરવાનું શરૂ કરવા માટે નીચે આપેલા 'રેકોર્ડ સ્ક્રીન' બટન પર ક્લિક કરો.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. એક વખત તમે સમસ્યાનું રેકોર્ડિંગ પૂરું કરી લો એટલે 'સ્ટોપ રેકોર્ડિંગ' બટન પર ક્લિક કરો (આ જ બટન, પરંતુ તમે સક્રિયપણે રેકોર્ડિંગ કરી રહ્યા છો કે નહીં તેના આધારે લેબલ બદલાય છે).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub issues] (https://github.com/IAHispano/Applio/issues) પર જાઓ અને 'ન્યૂ ઇશ્યૂ' બટન પર ક્લિક કરો.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. પૂરી પાડવામાં આવેલ ઇશ્યૂ ટેમ્પલેટ પૂર્ણ કરો, જરૂરિયાત મુજબ વિગતોનો સમાવેશ કરવાની ખાતરી કરો અને અગાઉના પગલામાંથી રેકોર્ડ કરેલી ફાઇલને અપલોડ કરવા માટે અસ્કયામતોના વિભાગનો ઉપયોગ કરો.",
+ "Record Screen": "રેકોર્ડ સ્ક્રીન",
+ "Record": "રેકોર્ડ",
+ "Stop Recording": "રેકોર્ડ કરવાનું બંધ કરો",
+ "Introduce the model .pth path": "મોડલ .pth પાથને રજૂ કરો",
+ "See Model Information": "મોડેલ જાણકારી જુઓ",
+ "## Download Model": "## ડાઉનલોડ મોડેલ",
+ "Model Link": "મોડેલ કડી",
+ "Introduce the model link": "મોડેલ કડીનો પરિચય આપો",
+ "Download Model": "ડાઉનલોડ મોડેલ",
+ "## Drop files": "## ફાઇલો મૂકો",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "આ જગ્યામાં તમારી .pth ફાઇલ અને .index ફાઇલને ખેંચો. એકને ખેંચો અને પછી બીજું.",
+ "TTS Voices": "TTS અવાજો",
+ "Select the TTS voice to use for the conversion.": "રૂપાંતરણ માટે વાપરવા માટે TTS અવાજ પસંદ કરો.",
+ "Text to Synthesize": "સંશ્લેષણ કરવા માટેનું લખાણ",
+ "Enter the text to synthesize.": "સંશ્લેષણ કરવા માટે લખાણ દાખલ કરો.",
+ "Or you can upload a .txt file": "અથવા તમે .txt ફાઇલ અપલોડ કરી શકો છો",
+ "Enter text to synthesize": "સંશ્લેષણ કરવા માટે લખાણ દાખલ કરો",
+ "Output Path for TTS Audio": "TTS ઓડિયો માટે આઉટપુટ પાથ",
+ "Output Path for RVC Audio": "RVC ઓડિયો માટે આઉટપુટ પાથ",
+ "Enable Applio integration with Discord presence": "ડિસ્કોર્ડ હાજરી સાથે એપ્લિઓ સંકલન સક્રિય કરો",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "તે ડિસ્કોર્ડમાં વર્તમાન એપ્લિયો પ્રવૃત્તિ પ્રદર્શિત કરવાની સંભાવનાને સક્રિય કરશે.",
+ "Enable Applio integration with applio.org/models using flask": "ફ્લાસ્કની મદદથી applio.org/models સાથે એપ્લિયો સંકલન સક્રિય કરો",
+ "It will activate the possibility of downloading models with a click from the website.": "તે વેબસાઇટ પરથી એક ક્લિક સાથે મોડેલો ડાઉનલોડ કરવાની સંભાવનાને સક્રિય કરશે.",
+ "Theme": "થીમ",
+ "Select the theme you want to use. (Requires restarting Applio)": "થીમ પસંદ કરો જે તમે વાપરવા માંગો છો. (એપ્લિયોને ફરી શરૂ કરવાની જરૂર છે)",
+ "Language": "ભાષા",
+ "Select the language you want to use. (Requires restarting Applio)": "તમે જે ભાષા વાપરવા માંગો તે પસંદ કરો. (એપ્લિયોને ફરી શરૂ કરવાની જરૂર છે)",
+ "Plugin Installer": "પ્લગઇન સ્થાપનાર",
+ "Drag your plugin.zip to install it": "તેને સ્થાપિત કરવા માટે તમારા plugin.zip ખેંચો",
+ "Version Checker": "આવૃત્તિ ચકાસનાર",
+ "Check which version of Applio is the latest to see if you need to update.": "તમારે અપડેટ કરવાની જરૂર છે કે નહીં તે જોવા માટે એપ્લિઓનું કયું સંસ્કરણ નવીનતમ છે તે તપાસો.",
+ "Check for updates": "સુધારાઓ માટે ચકાસો"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/hi_IN.json b/assets/i18n/languages/hi_IN.json
new file mode 100644
index 0000000000000000000000000000000000000000..affef2a5eb813a9c8c46c5fd94678b030262688e
--- /dev/null
+++ b/assets/i18n/languages/hi_IN.json
@@ -0,0 +1,215 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "अल्टीमेट वॉयस क्लोनिंग टूल, बेजोड़ पॉवर, मॉड्यूलरिटी और उपयोगकर्ता-अनुकूल अनुभव के लिए बारीकी से ऑप्टिमाइज़ किया गया।\n[हिन्दी अनुवाद: Enes](https://discord.com/users/1140031358006202468)",
+ "This section contains some extra utilities that often may be in experimental phases.": "इस खंड में कुछ अतिरिक्त उपयोगिताएँ होती हैं जो अक्सर प्रायोगिक चरणों में हो सकती हैं।",
+ "Output Information": "आउटपुट जानकारी",
+ "The output information will be displayed here.": "आउटपुट जानकारी यहाँ प्रदर्शित की जाएगी।",
+
+ "Inference": "निष्कर्ष",
+ "Train": "ट्रेन",
+ "Extra": "अतिरिक्त",
+ "Merge Audios": "इस ऑडियो को मर्ज करें",
+ "Processing": "प्रोसेसिंग",
+ "Audio Analyzer": "ऑडियो एनालाइज़र",
+ "Model Information": "मॉडल जानकारी",
+ "Plugins": "प्लगइन्स",
+ "Download": "डाउनलोड करें",
+ "Report a Bug": "एक बग की रिपोर्ट करें",
+ "Settings": "सेटिंग्स",
+
+ "Preprocess": "पूर्व-प्रसंस्करण",
+ "Model Name": "मॉडल का नाम",
+ "Name of the new model.": "नए मॉडल का नाम।",
+ "Enter model name": "मॉडल नाम डालें",
+ "Dataset Path": "डेटासेट पथ",
+ "Path to the dataset folder.": "डेटासेट फ़ोल्डर का पथ।",
+ "Refresh Datasets": "डेटासेट रीफ्रेश करें",
+ "Dataset Creator": "डेटासेट बनाने वाला",
+ "Dataset Name": "डेटासेट का नाम",
+ "Name of the new dataset.": "नए डेटासेट का नाम।",
+ "Enter dataset name": "डेटासेट का नाम डालें",
+ "Upload Audio Dataset": "ऑडियो डेटासेट अपलोड करें",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ऑडियो फ़ाइल को डेटासेट में सफलतापूर्वक जोड़ा गया है। कृपया प्रीप्रोसेस बटन पर क्लिक करें।",
+ "Enter dataset path": "डेटासेट पथ डालें",
+ "Sampling Rate": "नमूनाकरण दर",
+ "The sampling rate of the audio files.": "ऑडियो फ़ाइलों की नमूनाकरण दर।",
+ "Model Architecture": "RVC वर्शन",
+ "Version of the model architecture.": "मॉडल का RVC वर्शन।",
+ "Preprocess Dataset": "डेटासेट का पूर्व-प्रसंस्करण करें",
+
+ "Embedder Model": "एम्बेडर मॉडल",
+ "Model used for learning speaker embedding.": "स्पीकर एम्बेडिंग सीखने के लिए उपयोग किया जाने वाला मॉडल।",
+ "Extract": "एक्सट्रैक्ट",
+ "Hop Length": "हॉप लंबाई",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "यह अवधी को दर्शाती है जिसे सिस्टम को पिच में महत्वपूर्ण बदलाव के लिए ले जाना पड़ता है। कम हॉप लंबाई को अनुमान लगाने में अधिक समय लगता है लेकिन उच्च पिच सटीकता मिलती है।",
+ "Batch Size": "बैच आकार",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "अपने GPU की उपलब्ध VRAM के साथ इसे संरेखित करना उचित है। 4 की सेटिंग में सटीकता बेहतर होती है लेकिन प्रोसेसिंग धीमी होती है, जबकि 8 तेज और मानक परिणाम प्रदान करता है।",
+ "Save Every Epoch": "प्रत्येक युग को सेव करें",
+ "Determine at how many epochs the model will saved at.": "निर्धारित करें कि कितने युग पर मॉडल सहेजा जाएगा",
+ "Total Epoch": "कुल युग",
+ "Specifies the overall quantity of epochs for the model training process.": "मॉडल प्रशिक्षण प्रक्रिया के लिए युग की समग्र मात्रा निर्दिष्ट करता है।",
+ "Pretrained": "पूर्व प्रशिक्षित",
+ "Save Only Latest": "केवल नवीनतम को सेव करें",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "इस सेटिंग को सक्षम करने पर G और D फ़ाइलें अपने केवल नवीनतम संस्करण को ही सेव करेंगी, भंडारण स्थान को प्रभावी ढंग से संरक्षित करना।",
+ "Save Every Weights": "प्रत्येक वज़न को सेव करें",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "यह सेटिंग आपको प्रत्येक युग के समापन पर मॉडल के वजन को सेव करने में सक्षम बनाती है।",
+ "Custom Pretrained": "कस्टम पूर्व प्रशिक्षित",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "कस्टम पूर्व प्रशिक्षित मॉडलों का उपयोग करने से बेहतर परिणाम मिल सकते हैं, क्योंकि विशिष्ट उपयोग के मामले के अनुरूप सबसे उपयुक्त पूर्व प्रशिक्षित मॉडलों को चुनने से प्रदर्शन में काफी वृद्धि हो सकती है।",
+ "Upload Pretrained Model": "पूर्व प्रशिक्षित मॉडल अपलोड करें",
+ "Refresh Custom Pretraineds": "कस्टम पूर्व प्रशिक्षितों को ताज़ा करें",
+ "Pretrained Custom Settings": "पूर्व प्रशिक्षित कस्टम सेटिंग्स",
+ "The file you dropped is not a valid pretrained file. Please try again.": "जो फ़ाइल आपने छोड़ी है वह एक मान्य पूर्व प्रशिक्षित फ़ाइल नहीं है। कृपया पुनः प्रयास करें।",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "ड्रॉपडाउन मेनू में पूर्व प्रशिक्षित फ़ाइल को देखने के लिए रीफ़्रेश बटन पर क्लिक करें।",
+ "Pretrained G Path": "कस्टम पूर्व प्रशिक्षित G",
+ "Pretrained D Path": "कस्टम पूर्व प्रशिक्षित D",
+ "GPU Settings": "GPU सेटिंग्स",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "उन्नत GPU सेटिंग्स सेट करता है, बेहतर GPU आर्किटेक्चर वाले उपयोगकर्ताओं के लिए अनुशंसित।",
+ "GPU Custom Settings": "GPU कस्टम सेटिंग्स",
+ "GPU Number": "GPU संख्या",
+ "0 to ∞ separated by -": "0 से ∞ तक अलग से -",
+ "GPU Information": "GPU जानकारी",
+ "Pitch Guidance": "पिच मार्गदर्शन",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "पिच मार्गदर्शन को नियोजित करके, मूल आवाज़ के स्वर को प्रतिबिंबित करना संभव हो जाता है। यह सुविधा गायन और अन्य परिदृश्य के लिए विशेष रूप से मूल्यवान है जहाँ मूल राग या पिच पैटर्न को संरक्षित करना आवश्यक है।",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "अपना खुद का प्रशिक्षण करते समय पूर्व प्रशिक्षित मॉडलों का उपयोग करें। यह दृष्टिकोण प्रशिक्षण की अवधि कम करता है और समग्र गुणवत्ता को बढ़ाता है।",
+ "Extract Features": "विशेषताएँ निकालें",
+ "Overtraining Detector": "ओवरट्रेनिंग डिटेक्टर",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "मॉडल को प्रशिक्षण डेटा को बहुत अच्छी तरह से सीखने और नए डेटा को सामान्य बनाने की क्षमता खोने से रोकने के लिए ओवरट्रेनिंग का पता लगाएं।",
+ "Overtraining Detector Settings": "ओवरट्रेनिंग डिटेक्टर सेटिंग्स",
+ "Overtraining Threshold": "ओवरट्रेनिंग थ्रेशोल्ड",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "यदि कोई सुधार नहीं पाया जाता है, तो आप अपने मॉडल को प्रशिक्षण बंद करने के लिए अधिकतम युगों की संख्या निर्धारित करें।",
+
+ "Start Training": "प्रशिक्षण शुरू करें",
+ "Stop Training & Restart Applio": "प्रशिक्षण रोकें और Applio को पुनः आरंभ करें",
+ "Generate Index": "इंडेक्स बनाएँ",
+
+ "Export Model": "मॉडल निर्यात करें",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'अपलोड' बटन केवल गूगल कोलाब के लिए है: निर्यात की गई फ़ाइलों को आपके Google ड्राइव में ApplioExported फ़ोल्डर में अपलोड करता है।",
+ "Exported Pth file": "निर्यात की गई Pth फ़ाइल",
+ "Exported Index file": "निर्यात की गई इंडेक्स फ़ाइल",
+ "Select the pth file to be exported": "निर्यात की जाने वाली pth फ़ाइल का चयन करें",
+ "Select the index file to be exported": "निर्यात की जाने वाली इंडेक्स फ़ाइल का चयन करें",
+ "Upload": "अपलोड करें",
+
+ "Voice Model": "आवाज़ का मॉडल",
+ "Select the voice model to use for the conversion.": "रूपांतरण के लिए उपयोग करने के लिए आवाज मॉडल का चयन करें।",
+ "Index File": "इंडेक्स फ़ाइल",
+ "Select the index file to use for the conversion.": "रूपांतरण के लिए उपयोग करने के लिए इंडेक्स फ़ाइल का चयन करें।",
+ "Refresh": "ताज़ा करें",
+ "Unload Voice": "आवाज़ अनलोड करें",
+
+ "Single": "सिंगल",
+ "Upload Audio": "ऑडियो अपलोड करें",
+ "Select Audio": "ऑडियो चुनें",
+ "Select the audio to convert.": "रूपांतरित करने के लिए ऑडियो चुनें।",
+ "Advanced Settings": "उन्नत सेटिंग्स",
+ "Clear Outputs (Deletes all audios in assets/audios)": "(assets/audios में सभी ऑडियो को हटाता है) आउटपुट साफ़ करें",
+ "Custom Output Path": "कस्टम आउटपुट पथ",
+ "Output Path": "आउटपुट पथ",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "आउटपुट ऑडियो का पथ जहाँ उसे सेव किया जाएगा, डिफ़ॉल्ट रूप से assets/audios/output.wav में",
+ "Split Audio": "ऑडियो को विभाजित करें",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "कुछ स्थितियों में अधिक बेहतर परिणाम प्राप्त करने के लिए अनुमान के लिए ऑडियो को हिस्सों में विभाजित करें।",
+ "Autotune": "ऑटोट्यून",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "अपने अनुमानों पर एक नरम ऑटोट्यून लागू करें, यह गायन रूपांतरण के लिए अनुशंसित है।",
+ "Clean Audio": "ऑडियो साफ़ करें",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "शोर का पता लगाने वाले एल्गोरिदम का उपयोग करके अपने ऑडियो आउटपुट को साफ़ करें, बोलने वाले ऑडियो के लिए अनुशंसित।",
+ "Clean Strength": "सफाई शक्ति",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "ऑडियो को साफ़ करने का स्तर निर्धारित करें, जैसे-जैसे आप इसे बढ़ाते जाएँगे यह उतना ही अधिक साफ़ करेगा, लेकिन यह संभव है कि ऑडियो और संकुचित हो सकता है।",
+ "Pitch": "पिच",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ऑडियो की पिच सेट करें, मान जितना अधिक होगा, पिच उतनी ही अधिक होगी।",
+ "Filter Radius": "फ़िल्टर त्रिज्या",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "यदि संख्या तीन से अधिक या उसके बराबर है, तो एकत्र किए गए स्वर परिणामों पर मेडियन फ़िल्टरिंग का उपयोग करके साँस लेना कम हो जाता है।",
+ "Search Feature Ratio": "फीचर अनुपात खोजें",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "इंडेक्स फ़ाइल द्वारा प्रभावित; एक उच्च मान अधिक प्रभाव से मेल खाता है। हालाँकि, कम मूल्यों को चुनने से ऑडियो में मौजूद कलाकृतियों को कम करने में मदद मिल सकती है।",
+ "Volume Envelope": "आयतन आवरण",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "आउटपुट के आयतन आवरण के साथ स्थानापन्न करें या सम्मिश्रित करें। अनुपात जितना 1 के करीब होगा, आउटपुट आवरण उतना ही अधिक नियोजित किया जाएगा।",
+ "Protect Voiceless Consonants": "वॉयसलेस व्यंजनों की सुरक्षा करें",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "इलेक्ट्रो-ध्वनिक फाड़ और अन्य कलाकृतियों को रोकने के लिए अलग-अलग व्यंजन और साँस लेने की आवाज़ को सुरक्षित रखें। पैरामीटर को उसके अधिकतम मान 0.5 तक खींचना व्यापक सुरक्षा प्रदान करता है। हालाँकि, इस मान को कम करने से सुरक्षा की सीमा कम हो सकती है जबकि संभावित रूप से अनुक्रमण प्रभाव कम हो सकता है।",
+ "Pitch extraction algorithm": "पिच निष्कर्षण एल्गोरिदम",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ऑडियो रूपांतरण के लिए उपयोग करने के लिए पिच निष्कर्षण एल्गोरिदम। डिफ़ॉल्ट एल्गोरिथम rmvpe है, जो अधिकांश मामलों के लिए अनुशंसित है।",
+
+ "Convert": "रूपांतरित करें",
+ "Export Audio": "ऑडियो एक्सपोर्ट करें",
+
+ "Batch": "बैच",
+ "Input Folder": "इनपुट फ़ोल्डर",
+ "Select the folder containing the audios to convert.": "रूपांतरित करने के लिए ऑडियो वाली फ़ोल्डर का चयन करें।",
+ "Enter input path": "इनपुट पथ दर्ज करें",
+ "Output Folder": "आउटपुट फ़ोल्डर",
+ "Select the folder where the output audios will be saved.": "फ़ोल्डर का चयन करें जहाँ आउटपुट ऑडियो को सेव किया जाएगा।",
+ "Enter output path": "आउटपुट पथ दर्ज करें",
+
+ "Get information about the audio": "ऑडियो के बारे में जानकारी प्राप्त करें",
+
+ "## Voice Blender": "## Voice Blender",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "दो आवाज़ के मॉडल का चयन करें, अपना वांछित मिश्रित प्रतिशत सेट करें, और उन्हें एक पूर्ण रूप से नई आवाज़ में सम्मिश्रित करें।",
+ "Voice Blender": "Voice Blender",
+ "Drag and drop your model here": "यहाँ अपना मॉडल ड्रैग एंड ड्रॉप करें",
+ "You can also use a custom path.": "आप कस्टम पथ का भी उपयोग कर सकते हैं।",
+ "Blend Ratio": "मिश्रित अनुपात",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "स्थिति को एक तरफ या दूसरी तरफ समायोजित करने से माॅडल पहले या दूसरे के समान हो जाएगा।",
+ "Fusion": "सम्मिश्रण",
+
+ "Path to Model": "मॉडल का पथ",
+ "Enter path to model": "मॉडल का पथ दर्ज करें",
+ "Model information to be placed": "मॉडल की जानकारी स्थान की जाएगी",
+ "Inroduce the model information": "मॉडल की जानकारी का परिचय दें।",
+ "The information to be placed in the model (You can leave it blank or put anything).": "मॉडल में रखी जाने वाली जानकारी (आप इसे खाली छोड़ सकते हैं या कुछ भी डाल सकते हैं)।",
+ "View model information": "मॉडल की जानकारी देखें",
+ "Introduce the model pth path": "मॉडल pth पथ का परिचय दें",
+ "View": "देखें",
+ "Model extraction": "मॉडल निष्कर्षण",
+ "Model conversion": "मॉडल रूपांतरण",
+ "Pth file": "Pth फ़ाइल",
+ "Output of the pth file": "Pth फ़ाइल का आउटपुट",
+
+ "# How to Report an Issue on GitHub": "# GitHub पर किसी समस्या की रिपोर्ट कैसे करें",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1.आपके द्वारा अनुभव की जा रही समस्या को रिकॉर्ड करना शुरू करने के लिए नीचे दिए गए 'रिकॉर्ड स्क्रीन' बटन पर क्लिक करें।",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. एक बार जब आप समस्या की रिकॉर्डिंग समाप्त कर लें, तो 'स्टॉप रिकॉर्डिंग' बटन पर क्लिक करें (वही बटन, लेकिन लेबल इस पर निर्भर करता है कि आप सक्रिय रूप से रिकॉर्डिंग कर रहे हैं या नहीं)।",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) पर जाएँ और 'New Issue' बटन पर क्लिक करें।",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. दिए गए समस्या टेम्पलेट को पूरा करें, आवश्यकतानुसार विवरण शामिल करना सुनिश्चित करें, और पिछले चरण से रिकॉर्ड की गई फ़ाइल को अपलोड करने के लिए एसेट सेक्शन का उपयोग करें।",
+
+ "Record Screen": "स्क्रीन रिकॉर्ड करें",
+ "Record": "रिकॉर्ड करें",
+ "Stop Recording": "रिकॉर्डिंग बंद करें",
+
+ "Introduce the model .pth path": "मॉडल .pth पथ का परिचय दें",
+ "See Model Information": "मॉडल की जानकारी देखें",
+
+ "## Download Model": "## मॉडल डाउनलोड करें",
+ "Model Link": "मॉडल लिंक",
+ "Introduce the model link": "मॉडल लिंक का परिचय दें",
+ "Download Model": "मॉडल डाउनलोड करें",
+ "## Drop files": "## ड्रॉप फ़ाइलें",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "अपनी .pth फ़ाइल और .index फ़ाइल को इस स्थान पर ड्रैग करें। एक को ड्रैग करें और फिर दूसरे को ड्रैग करें।",
+ "## Search Model": "## मॉडल खोजें",
+ "Search": "खोज",
+ "Introduce the model name to search.": "खोज करने के लिए मॉडल का नाम बताएं।",
+ "We couldn't find models by that name.": "हमें उस नाम से मॉडल नहीं मिला।",
+
+ "TTS Voices": "TTS Voices",
+ "Select the TTS voice to use for the conversion.": "रूपांतरण के लिए उपयोग करने के लिए TTS वॉयस का चयन करें।",
+ "Text to Synthesize": "Text to Synthesize",
+ "Enter the text to synthesize.": "संश्लेषित करने के लिए पाठ दर्ज करें।",
+ "Or you can upload a .txt file": "या आप एक .txt फ़ाइल अपलोड कर सकते हैं",
+ "Enter text to synthesize": "संश्लेषित करने के लिए पाठ दर्ज करें",
+ "Output Path for TTS Audio": "TTS ऑडियो के लिए आउटपुट पथ",
+ "Output Path for RVC Audio": "RVC ऑडियो के लिए आउटपुट पथ",
+
+ "Enable Applio integration with Discord presence": "डिस्कॉर्ड उपस्थिति के साथ Applio एकीकरण को सक्षम करें",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "यह डिस्कॉर्ड में वर्तमान Applio गतिविधि प्रदर्शित करने की संभावना को सक्रिय करेगा।",
+ "Enable Applio integration with applio.org/models using flask": "flask का उपयोग करके applio.org/models के साथ Applio एकीकरण को सक्षम करें",
+ "It will activate the possibility of downloading models with a click from the website.": "यह वेबसाइट से एक क्लिक से मॉडल डाउनलोड करने की संभावना को सक्रिय करेगा।",
+ "Enable fake GPU": "नकली GPU सक्षम करें",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "GPU की अनुपस्थिति के कारण प्रशिक्षण वर्तमान में असमर्थित है। प्रशिक्षण टैब को सक्रिय करने के लिए, सेटिंग टैब पर नेविगेट करें और 'नकली GPU' विकल्प को सक्षम करें।",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "ट्रेन टैब को सक्रिय करता है। हालांकि, कृपया ध्यान दें कि इस डिवाइस में GPU क्षमताएं नहीं हैं, इसलिए प्रशिक्षण समर्थित नहीं है। यह विकल्प केवल परीक्षण उद्देश्यों के लिए है। (यह विकल्प Applio को पुनरारंभ करेगा)",
+ "Theme": "थीम",
+ "Select the theme you want to use. (Requires restarting Applio)": "वह थीम चुनें जिसका आप उपयोग करना चाहते हैं। (Applio को पुनरारंभ करने की आवश्यकता है)",
+ "Language": "भाषा",
+ "Select the language you want to use. (Requires restarting Applio)": "वह भाषा चुनें जिसका आप उपयोग करना चाहते हैं। (Applio को पुनरारंभ करने की आवश्यकता है)",
+
+ "Plugin Installer": "प्लगइन इंस्टॉलर",
+ "Drag your plugin.zip to install it": "इसे इंस्टॉल करने के लिए अपने plugin.zip को ड्रैग करें",
+
+ "Version Checker": "संस्करण चेकर",
+ "Check which version of Applio is the latest to see if you need to update.": "अद्यतित करने के लिए आपको चाहिए या नहीं यह देखने के लिए जाँच करें कि Applio का कौन सा संस्करण नवीनतम है।",
+ "Check for updates": "अपडेट जांचें"
+}
diff --git a/assets/i18n/languages/hu_HU.json b/assets/i18n/languages/hu_HU.json
new file mode 100644
index 0000000000000000000000000000000000000000..388145cdfcd29dc0e0da420a986f1065347e76a9
--- /dev/null
+++ b/assets/i18n/languages/hu_HU.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "A legjobb hangklónozó eszköz, aprólékosan optimalizálva a páratlan teljesítmény, a modularitás és a felhasználóbarát élmény érdekében.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Ez a szakasz néhány további segédprogramot tartalmaz, amelyek gyakran kísérleti fázisban vannak.",
+ "Output Information": "Kimeneti információk",
+ "The output information will be displayed here.": "A kimeneti információk itt jelennek meg.",
+ "Inference": "Következtetés",
+ "Train": "Vonat",
+ "Extra": "Többlet",
+ "Merge Audios": "Hangok egyesítése",
+ "Processing": "Feldolgozás",
+ "Audio Analyzer": "Hangelemző",
+ "Model Information": "Modell információk",
+ "Plugins": "Bővítmények",
+ "Download": "Letöltés",
+ "Report a Bug": "Hiba jelentése",
+ "Settings": "Beállítások",
+ "Preprocess": "Előfeldolgozás",
+ "Model Name": "Modell neve",
+ "Name of the new model.": "Az új modell neve.",
+ "Enter model name": "Adja meg a modell nevét",
+ "Dataset Path": "Adatkészlet elérési útja",
+ "Path to the dataset folder.": "Az adatkészletmappa elérési útja.",
+ "Refresh Datasets": "Adatkészletek frissítése",
+ "Dataset Creator": "Adatkészlet létrehozója",
+ "Dataset Name": "Adatkészlet neve",
+ "Name of the new dataset.": "Az új adatkészlet neve.",
+ "Enter dataset name": "Adja meg az adatkészlet nevét",
+ "Upload Audio Dataset": "Hangadatkészlet feltöltése",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "A hangfájl sikeresen hozzá lett adva az adatkészlethez. Kérem kattintson az előfeldolgozás gombra.",
+ "Enter dataset path": "Adja meg az adatkészlet elérési útját",
+ "Sampling Rate": "Mintavételi arány",
+ "The sampling rate of the audio files.": "Az audiofájlok mintavételi frekvenciája.",
+ "Model Architecture": "RVC verzió",
+ "Version of the model architecture.": "A modell RVC verziója.",
+ "Preprocess Dataset": "Adatkészlet előfeldolgozása",
+ "Extract": "Kivonat",
+ "Hop Length": "Komló hossza",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Azt az időtartamot jelöli, amely alatt a rendszer jelentős hangmagasság-változásra vált. A kisebb ugráshosszak több időt igényelnek a következtetéshez, de általában nagyobb hangmagasság-pontosságot eredményeznek.",
+ "Batch Size": "Tétel mérete",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Javasoljuk, hogy igazítsa a GPU rendelkezésre álló VRAM-jához. A 4-es beállítás nagyobb pontosságot, de lassabb feldolgozást biztosít, míg a 8-as gyorsabb és szabványos eredményeket biztosít.",
+ "Save Every Epoch": "Mentsd meg minden korszakot",
+ "Determine at how many epochs the model will saved at.": "Határozza meg, hogy hány korszakban menti a modellt.",
+ "Total Epoch": "Teljes korszak",
+ "Specifies the overall quantity of epochs for the model training process.": "A modell betanítási folyamatának epocháinak teljes mennyiségét adja meg.",
+ "Pretrained": "Előre betanított",
+ "Save Only Latest": "Csak a legújabbak mentése",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "A beállítás engedélyezése azt eredményezi, hogy a G és D fájlok csak a legújabb verziójukat mentik, így hatékonyan megtakarítják a tárhelyet.",
+ "Save Every Weights": "Takarítson meg minden súlyt",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Ez a beállítás lehetővé teszi a modell súlyozásának mentését az egyes korszakok végén.",
+ "Custom Pretrained": "Egyéni előképzett",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Az egyéni előre betanított modellek használata kiváló eredményekhez vezethet, mivel az adott használati esetre szabott legmegfelelőbb előre betanított modellek kiválasztása jelentősen javíthatja a teljesítményt.",
+ "Upload Pretrained Model": "Előre betanított modell feltöltése",
+ "Refresh Custom Pretraineds": "Egyéni előképzetek frissítése",
+ "Pretrained Custom Settings": "Előre betanított egyéni beállítások",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Az eldobott fájl nem érvényes előre betanított fájl. Kérjük, próbálja újra.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Kattintson a frissítés gombra az előre betanított fájl megjelenítéséhez a legördülő menüben.",
+ "Pretrained G Path": "Egyéni előképzett G",
+ "Pretrained D Path": "Egyéni előképzett D",
+ "GPU Settings": "GPU-beállítások",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Speciális GPU-beállításokat állít be, amelyek a jobb GPU-architektúrával rendelkező felhasználók számára ajánlottak.",
+ "GPU Custom Settings": "GPU egyéni beállítások",
+ "GPU Number": "GPU-szám",
+ "0 to ∞ separated by -": "0-tól ∞-ig - választja el",
+ "GPU Information": "GPU-információk",
+ "Pitch Guidance": "Pitch útmutatás",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "A hangmagasság-útmutatás alkalmazásával megvalósíthatóvá válik az eredeti hang intonációjának tükrözése, beleértve annak hangmagasságát is. Ez a funkció különösen értékes énekléshez és más forgatókönyvekhez, ahol az eredeti dallam vagy hangmagasság minta megőrzése elengedhetetlen.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Használja az előre betanított modelleket a saját betanításakor. Ez a megközelítés csökkenti a képzés időtartamát és javítja az általános minőséget.",
+ "Extract Features": "Jellemzők kivonása",
+ "Start Training": "Kezdje el az edzést",
+ "Generate Index": "Index létrehozása",
+ "Voice Model": "Hangmodell",
+ "Select the voice model to use for the conversion.": "Válassza ki az átalakításhoz használni kívánt hangmodellt.",
+ "Index File": "Index fájl",
+ "Select the index file to use for the conversion.": "Válassza ki az átalakításhoz használni kívánt indexfájlt.",
+ "Refresh": "Felfrissít",
+ "Unload Voice": "Hang eltávolítása",
+ "Single": "Nőtlen",
+ "Upload Audio": "Hang feltöltése",
+ "Select Audio": "Válassza az Audio lehetőséget",
+ "Select the audio to convert.": "Válassza ki a konvertálni kívánt hangot.",
+ "Advanced Settings": "Speciális beállítások",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Kimenetek törlése (Törli az összes hangot az eszközökből/hangokból)",
+ "Custom Output Path": "Egyéni kimeneti útvonal",
+ "Output Path": "Kimeneti útvonal",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Az elérési út, ahová a kimeneti hang mentésre kerül, alapértelmezés szerint az eszközökben / hangokban / output.wav",
+ "Split Audio": "Osztott hang",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Ossza fel a hangot darabokra a következtetéshez, hogy bizonyos esetekben jobb eredményeket érjen el.",
+ "Autotune": "Automatikus hangolás",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Alkalmazzon lágy automatikus hangolást a következtetésekre, ami az énekkonverziókhoz ajánlott.",
+ "Clean Audio": "Tiszta hangzás",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Tisztítsa meg a hangkimenetet zajérzékelő algoritmusokkal, amelyek a hangok beszédéhez ajánlottak.",
+ "Clean Strength": "Tiszta szilárdság",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Állítsa be a tisztítási szintet a kívánt hangra, minél jobban növeli, annál jobban megtisztítja, de lehetséges, hogy a hang tömörítettebb lesz.",
+ "Pitch": "Hangmagasság",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Állítsa be a hang hangmagasságát, minél magasabb az érték, annál magasabb a hangmagasság.",
+ "Filter Radius": "Szűrési sugár",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Ha a szám nagyobb vagy egyenlő hárommal, az összegyűjtött tónuseredmények medián szűrésének alkalmazása csökkentheti a légzést.",
+ "Search Feature Ratio": "Keresési funkciók aránya",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Az indexfájl által gyakorolt hatás; A magasabb érték nagyobb befolyásnak felel meg. Az alacsonyabb értékek választása azonban segíthet enyhíteni a hangban jelen lévő műtermékeket.",
+ "Volume Envelope": "Térfogat boríték",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Helyettesítse vagy keverje össze a kimenet térfogatburkológörbéjét. Minél közelebb van az arány az 1-hez, annál nagyobb a kimeneti burkológörbe.",
+ "Protect Voiceless Consonants": "Zöngétlen mássalhangzók védelme",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Védje a különálló mássalhangzókat és légzési hangokat, hogy megakadályozza az elektroakusztikus szakadást és más műtermékeket. A paraméter maximális 0,5-ös értékre való lekérése átfogó védelmet nyújt. Ennek az értéknek a csökkentése azonban csökkentheti a védelem mértékét, miközben potenciálisan enyhítheti az indexelési hatást.",
+ "Pitch extraction algorithm": "Pitch extrakciós algoritmus",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Hangmagasság-kinyerési algoritmus az audio konvertáláshoz. Az alapértelmezett algoritmus az rmvpe, amely a legtöbb esetben ajánlott.",
+ "Convert": "Megtérít",
+ "Export Audio": "Hang exportálása",
+ "Batch": "Halom",
+ "Input Folder": "Bemeneti mappa",
+ "Select the folder containing the audios to convert.": "Válassza ki a konvertálni kívánt hangokat tartalmazó mappát.",
+ "Enter input path": "Adja meg a bemeneti útvonalat",
+ "Output Folder": "Kimeneti mappa",
+ "Select the folder where the output audios will be saved.": "Válassza ki azt a mappát, ahová a kimeneti hangokat menteni kívánja.",
+ "Enter output path": "Adja meg a kimeneti útvonalat",
+ "Get information about the audio": "Információk lekérése a hangról",
+ "Information about the audio file": "Információ a hangfájlról",
+ "Waiting for information...": "Információra várva...",
+ "## Voice Blender": "## Hangos turmixgép",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Válasszon ki két hangmodellt, állítsa be a kívánt keverési százalékot, és keverje össze őket egy teljesen új hangszínbe.",
+ "Voice Blender": "Hangos turmixgép",
+ "Drag and drop your model here": "Húzza ide a modellt",
+ "You can also use a custom path.": "Egyéni elérési utat is használhat.",
+ "Blend Ratio": "Keverési arány",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Ha a pozíciót jobban az egyik vagy a másik oldalra állítja, a modell jobban hasonlít az elsőhöz vagy a másodikhoz.",
+ "Fusion": "Fúzió",
+ "Path to Model": "A modell elérési útja",
+ "Enter path to model": "Adja meg a modell elérési útját",
+ "Model information to be placed": "Az elhelyezendő modellinformációk",
+ "Inroduce the model information": "A modellinformációk bemutatása",
+ "The information to be placed in the model (You can leave it blank or put anything).": "A modellben elhelyezendő információk (üresen hagyhatja, vagy bármit betehet).",
+ "View model information": "Modellinformációk megtekintése",
+ "Introduce the model pth path": "A modell pth elérési útjának bemutatása",
+ "View": "Nézet",
+ "Model extraction": "Modell kinyerése",
+ "Model conversion": "Modell átalakítás",
+ "Pth file": "Pth fájl",
+ "Output of the pth file": "A pth fájl kimenete",
+ "# How to Report an Issue on GitHub": "# Hogyan jelenthet problémát a GitHubon",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Kattintson az alábbi \"Felvétel képernyő\" gombra a tapasztalt probléma rögzítésének megkezdéséhez.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Miután befejezte a probléma rögzítését, kattintson a \"Felvétel leállítása\" gombra (ugyanaz a gomb, de a címke attól függően változik, hogy aktívan rögzít-e vagy sem).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Lépjen a [GitHub-problémák](https://github.com/IAHispano/Applio/issues) oldalra, és kattintson az \"Új probléma\" gombra.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Töltse ki a megadott problémasablont, ügyelve arra, hogy szükség szerint tartalmazza a részleteket, és használja az eszközök szakaszt az előző lépésből rögzített fájl feltöltéséhez.",
+ "Record Screen": "Felvétel képernyő",
+ "Record": "Rekord",
+ "Stop Recording": "Felvétel leállítása",
+ "Introduce the model .pth path": "A modell .pth elérési útjának bemutatása",
+ "See Model Information": "Modellinformációk megtekintése",
+ "## Download Model": "## Modell letöltése",
+ "Model Link": "Modell link",
+ "Introduce the model link": "A modellhivatkozás bemutatása",
+ "Download Model": "Modell letöltése",
+ "## Drop files": "## Dobja el a fájlokat",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Húzza a .pth és .index fájlt erre a helyre. Húzza az egyiket, majd a másikat.",
+ "TTS Voices": "TTS-hangok",
+ "Select the TTS voice to use for the conversion.": "Válassza ki az átalakításhoz használni kívánt TTS-hangot.",
+ "Text to Synthesize": "Szintetizálandó szöveg",
+ "Enter the text to synthesize.": "Írja be a szintetizálni kívánt szöveget.",
+ "Or you can upload a .txt file": "Vagy feltölthet egy .txt fájlt",
+ "Enter text to synthesize": "Írja be a szintetizálni kívánt szöveget",
+ "Output Path for TTS Audio": "A TTS Audio kimeneti útvonala",
+ "Output Path for RVC Audio": "Az RVC Audio kimeneti útvonala",
+ "Enable Applio integration with Discord presence": "Engedélyezze az Applio integrációját a Discord jelenléttel",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Aktiválja az aktuális Applio tevékenység megjelenítésének lehetőségét a Discordban.",
+ "Enable Applio integration with applio.org/models using flask": "Az Applio és a applio.org/models integrációjának engedélyezése lombik használatával",
+ "It will activate the possibility of downloading models with a click from the website.": "Aktiválja a modellek letöltésének lehetőségét egy kattintással a weboldalról.",
+ "Theme": "Téma",
+ "Select the theme you want to use. (Requires restarting Applio)": "Válassza ki a használni kívánt témát. (Az Applio újraindítását igényli)",
+ "Language": "Nyelv",
+ "Select the language you want to use. (Requires restarting Applio)": "Válassza ki a használni kívánt nyelvet. (Az Applio újraindítását igényli)",
+ "Plugin Installer": "Bővítmény telepítő",
+ "Drag your plugin.zip to install it": "Húzza a plugin.zip a telepítéshez",
+ "Version Checker": "Verzióellenőrző",
+ "Check which version of Applio is the latest to see if you need to update.": "Ellenőrizze, hogy az Applio melyik verziója a legújabb, hogy lássa, frissítenie kell-e.",
+ "Check for updates": "Frissítések keresése"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/id_ID.json b/assets/i18n/languages/id_ID.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d7548c01602869c0f826c10ea5718650f38ba63
--- /dev/null
+++ b/assets/i18n/languages/id_ID.json
@@ -0,0 +1,148 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Alat kloning suara terbaik, dioptimalkan secara cermat untuk kekuatan tak tertandingi, modularitas, dan pengalaman ramah pengguna.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Bagian ini berisi beberapa utilitas tambahan yang mungkin sering berada dalam tahap percobaan.",
+ "Output Information": "informasi keluaran",
+
+ "Inference": "Inference",
+ "Train": "training model",
+ "Extra": "bonus",
+ "Merge Audios": "Gabungkan Audio",
+ "Processing": "Pengolahan",
+ "Audio Analyzer": "Penganalisis Audio",
+ "Model Information": "informasi",
+ "Plugins": "Plugin",
+ "Download": "Unduh",
+ "Report a Bug": "Laporkan Bug",
+ "Settings": "Pengaturan",
+
+ "Preprocess": "Proses awal",
+ "Model Name": "Nama model",
+ "Enter model name": "masukkan nama model",
+ "Dataset Path": "Jalur Kumpulan Data",
+ "Dataset Creator": "Pembuat Kumpulan Data/dataset",
+ "Dataset Name": "Nama kumpulan data",
+ "Enter dataset name": "Masukkan nama kumpulan data",
+ "Upload Audio Dataset": "Unggah audio kumpulan data",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Berkas audio telah berhasil ditambahkan ke dataset. Silakan klik tombol praproses.",
+ "Enter dataset path": "Masukkan jalur kumpulan data",
+ "Sampling Rate": "Tingkat Pengambilan Sampel",
+ "Model Architecture": "Versi RVC",
+ "Preprocess Dataset": "Kumpulan Data Praproses",
+
+ "Extract": "Ekstrak",
+ "Hop Length": "Panjang Lompatan",
+ "Batch Size": "Ukuran Batch",
+ "Save Every Epoch": "Simpan Setiap Epoch",
+ "Total Epoch": "Total Epoch",
+ "Pretrained": "Terlatih sebelumnya",
+ "Save Only Latest": "Simpan Hanya Yang Terbaru",
+ "Save Every Weights": "Simpan Setiap Weights/beban",
+ "Custom Pretrained": "Terlatih Khusus",
+ "Upload Pretrained Model": "Unggah Model yang Telah Dilatih sebelumnya",
+ "Pretrained Custom Settings": "Pengaturan Kustom yang Telah Dilatih Sebelumnya",
+ "The file you dropped is not a valid pretrained file. Please try again.": "File yang Anda jatuhkan bukan file terlatih yang valid. Silakan coba lagi.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Klik tombol segarkan untuk melihat file yang telah dilatih sebelumnya di menu tarik-turun.",
+ "Pretrained G Path": "Terlatih Khusus G",
+ "Pretrained D Path": "Terlatih Khusus D",
+ "GPU Settings": "Penggaturan GPU",
+ "GPU Custom Settings": "Penggaturan GPU khusus",
+ "GPU Number": "Angka GPU",
+ "0 to ∞ separated by -": "0 to ∞ dipisahkan oleh -",
+ "GPU Information": "Informasi GPU",
+ "Pitch Guidance": "Panduan Lapangan/pitch",
+ "Extract Features": "Ekstrak Fitur",
+
+ "Start Training": "mulai Training",
+ "Generate Index": "Menghasilkan Index",
+
+ "Voice Model": "Model Suara",
+ "Index File": "Berkas Indeks",
+ "Refresh": "Muat ulang",
+ "Unload Voice": "Bongkar Suara",
+
+ "Single": "Sendiri",
+ "Upload Audio": "Ungah Audio",
+ "Select Audio": "Pilh Audio",
+ "Advanced Settings": "Pengaturan lanjutan",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Hapus Output (Menghapus semua audio di assets/audio)",
+ "Custom Output Path": "Kustom Jalur keluaran",
+ "Output Path": "Jalur keluaran",
+ "Split Audio": "pisah audio",
+ "Autotune": "Autotune",
+ "Pitch": "Nada",
+ "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "If >=3: terapkan pemfilteran median pada hasil pitch yang dipanen. Nilai tersebut mewakili radius filter dan dapat mengurangi sesak napas",
+ "Search Feature Ratio": "Rasio Fitur Pencarian",
+ "Pitch extraction algorithm": "Algoritma ekstraksi nada",
+ "Convert": "Convert",
+ "Export Audio": "Export Audio",
+
+ "Batch": "Batch",
+ "Input Folder": "Input berkas",
+ "Enter input path": "Masukkan jalur masukan",
+ "Output Folder": "Folder Keluaran",
+ "Enter output path": "Masukkan jalur keluaran",
+
+ "Get information about the audio": "Dapatkan informasi tentang audio",
+ "Information about the audio file": "Informasi tentang file audio",
+ "Waiting for information...": "Menunggu informasi...",
+
+ "Model fusion": "Penggabungan model",
+ "Weight for Model A": "Weight untuk model A",
+ "Whether the model has pitch guidance": "Apakah model memiliki panduan nada",
+ "Model architecture version": "Versi arsitektur model",
+ "Path to Model A": "Jalan Menuju Model A",
+ "Path to Model B": "Jalan Menuju Model B",
+ "Path to model": "Jalan Menuju Model",
+ "Model information to be placed": "Informasi model yang akan ditempatkan",
+ "Fusion": "Fusi",
+
+ "Modify model information": "Ubah informasi model",
+ "Path to Model": "Jalan Menuju Model",
+ "Model information to be modified": "Informasi model yang akan dimodifikasi",
+ "Save file name": "Simpan nama file",
+ "Modify": "Ubah",
+
+ "View model information": "tampilkan informasi model",
+ "View": "tampilakan",
+ "Model extraction": "Ekstraksi model",
+ "Model conversion": "Konversi model",
+ "Pth file": "file Pth",
+ "Output of the pth file": "Keluaran dari file pth",
+
+ "# How to Report an Issue on GitHub": "# Cara Melaporkan Masalah di GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klik tombol 'Rekam Layar' di bawah untuk mulai merekam masalah yang Anda alami.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. pergi ke [GitHub Issues](https://github.com/IAHispano/Applio/issues) dan klik tombol 'Masalah Baru'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Lengkapi templat masalah yang disediakan, pastikan untuk menyertakan detail sesuai kebutuhan, dan manfaatkan bagian aset untuk mengunggah file rekaman dari langkah sebelumnya.",
+
+ "Record Screen": "Rekam layar",
+ "Record": "Rekam",
+ "Stop Recording": "Berhenti merekam",
+
+ "Introduce the model .pth path": "Perkenalkan jalur model .pth",
+ "See Model Information": "Lihat informasi model",
+
+ "## Download Model": "## Unduh Model",
+ "Model Link": "Tautan model",
+ "Introduce the model link": "Perkenalkan tautan model",
+ "Download Model": "Unduh Model",
+ "## Drop files": "## masukkan file ",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Seret file .pth dan file .index ke dalam ruang ini. Seret yang pertama, lalu yang lainnya.",
+
+ "TTS Voices": "suara TTS",
+ "Text to Synthesize": "Text ke disintesis",
+ "Or you can upload a .txt file": "Atau Anda dapat mengunggah file .txt",
+ "Enter text to synthesize": "Masukkan teks untuk disintesis",
+ "Output Path for TTS Audio": "Jalur Keluaran untuk Audio TTS",
+ "Output Path for RVC Audio": "Jalur Keluaran untuk Audio RVC",
+
+ "Enable Applio integration with Discord presence": "Aktifkan integrasi Applio dengan kehadiran Discord",
+ "Enable Applio integration with applio.org/models using flask": "Aktifkan integrasi Applio dengan applio.org/models menggunakan flask",
+ "Theme": "Tema (Diperlukan mulai ulang)",
+
+ "Plugin Installer": "Penginstal Plugin",
+ "Drag your plugin.zip to install it": "Seret plugin.zip Anda untuk menginstalnya",
+
+ "Version Checker": "Pemeriksa versi",
+ "Check for updates": "Periksa pembaruan"
+}
diff --git a/assets/i18n/languages/it_IT.json b/assets/i18n/languages/it_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..a218d3c9b13df86ca08f536328151bb79617c0c8
--- /dev/null
+++ b/assets/i18n/languages/it_IT.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Strumento di clonazione vocale definitivo, meticolosamente ottimizzato per potenza, modularità ed esperienza utente senza rivali.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Questa sezione contiene alcune utilità aggiuntive che spesso possono essere in fase sperimentale.",
+ "Output Information": "Informazioni sull'output",
+ "The output information will be displayed here.": "Qui verranno visualizzate le informazioni sull'output.",
+ "Inference": "Inferenza",
+ "Train": "Addestramento",
+ "Extra": "Extra",
+ "Merge Audios": "Unisci audio",
+ "Processing": "Elaborazione",
+ "Audio Analyzer": "Analizzatore audio",
+ "Model Information": "Informazioni sul modello",
+ "Plugins": "Plugin",
+ "Download": "Scarica",
+ "Report a Bug": "Segnala un bug",
+ "Settings": "Impostazioni",
+ "Preprocess": "Pre-elaborazione",
+ "Model Name": "Nome del modello",
+ "Name of the new model.": "Nome del nuovo modello.",
+ "Enter model name": "Inserisci il nome del modello",
+ "Dataset Path": "Percorso del dataset",
+ "Path to the dataset folder.": "Percorso della cartella del dataset.",
+ "Refresh Datasets": "Aggiorna dataset",
+ "Dataset Creator": "Creatore di dataset",
+ "Dataset Name": "Nome del dataset",
+ "Name of the new dataset.": "Nome del nuovo dataset.",
+ "Enter dataset name": "Inserisci il nome del dataset",
+ "Upload Audio Dataset": "Carica dataset audio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Il file audio è stato aggiunto correttamente al dataset. Fare clic sul pulsante di pre-elaborazione.",
+ "Enter dataset path": "Inserisci il percorso del dataset",
+ "Sampling Rate": "Frequenza di campionamento",
+ "The sampling rate of the audio files.": "La frequenza di campionamento dei file audio.",
+ "Model Architecture": "Versione RVC",
+ "Version of the model architecture.": "La versione RVC del modello.",
+ "Preprocess Dataset": "Pre-elabora dataset",
+ "Extract": "Estrai",
+ "Hop Length": "Lunghezza del salto",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Indica il tempo necessario affinché il sistema passi a un cambiamento di intonazione significativo. Lunghezze di salto più piccole richiedono più tempo per l'inferenza, ma tendono a produrre una maggiore precisione dell'intonazione.",
+ "Batch Size": "Dimensione del batch",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Si consiglia di allinearlo con la VRAM disponibile della GPU. Un'impostazione di 4 offre una maggiore precisione ma un'elaborazione più lenta, mentre 8 fornisce risultati più rapidi e standard.",
+ "Save Every Epoch": "Salva ogni epoca",
+ "Determine at how many epochs the model will saved at.": "Determina dopo quante epoche il modello verrà salvato.",
+ "Total Epoch": "Epoche totali",
+ "Specifies the overall quantity of epochs for the model training process.": "Specifica la quantità complessiva di epoche per il processo di addestramento del modello.",
+ "Pretrained": "Pre-addestrato",
+ "Save Only Latest": "Salva solo l'ultima versione",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Abilitando questa impostazione, i file G e D salveranno solo le versioni più recenti, risparmiando spazio di archiviazione.",
+ "Save Every Weights": "Salva tutti i pesi",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Questa impostazione consente di salvare i pesi del modello alla fine di ogni epoca.",
+ "Custom Pretrained": "Pre-addestrato personalizzato",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "L'utilizzo di modelli pre-addestrati personalizzati può portare a risultati superiori, poiché la selezione dei modelli pre-addestrati più adatti al caso d'uso specifico può migliorare significativamente le prestazioni.",
+ "Upload Pretrained Model": "Carica modello pre-addestrato",
+ "Refresh Custom Pretraineds": "Aggiorna pre-addestrati personalizzati",
+ "Pretrained Custom Settings": "Impostazioni personalizzate pre-addestrate",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Il file caricato non è un file pre-addestrato valido. Si prega di riprovare.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Fare clic sul pulsante Aggiorna per visualizzare il file pre-addestrato nel menu a tendina.",
+ "Pretrained G Path": "Percorso G pre-addestrato personalizzato",
+ "Pretrained D Path": "Percorso D pre-addestrato personalizzato",
+ "GPU Settings": "Impostazioni GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Imposta le impostazioni avanzate della GPU, consigliate per gli utenti con una migliore architettura GPU.",
+ "GPU Custom Settings": "Impostazioni personalizzate GPU",
+ "GPU Number": "Numero GPU",
+ "0 to ∞ separated by -": "Da 0 a ∞ separati da -",
+ "GPU Information": "Informazioni sulla GPU",
+ "Pitch Guidance": "Guida all'intonazione",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Utilizzando la guida all'intonazione, diventa possibile rispecchiare l'intonazione della voce originale, compresa la sua altezza. Questa funzione è particolarmente utile per il canto e altri scenari in cui è essenziale preservare la melodia o il modello di intonazione originale.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilizza modelli pre-addestrati quando addestri i tuoi. Questo approccio riduce la durata dell'addestramento e migliora la qualità complessiva.",
+ "Extract Features": "Estrai caratteristiche",
+ "Start Training": "Inizia l'addestramento",
+ "Generate Index": "Genera indice",
+ "Voice Model": "Modello vocale",
+ "Select the voice model to use for the conversion.": "Seleziona il modello vocale da utilizzare per la conversione.",
+ "Index File": "File indice",
+ "Select the index file to use for the conversion.": "Seleziona il file indice da utilizzare per la conversione.",
+ "Refresh": "Aggiorna",
+ "Unload Voice": "Scarica voce",
+ "Single": "Singolo",
+ "Upload Audio": "Carica audio",
+ "Select Audio": "Seleziona audio",
+ "Select the audio to convert.": "Seleziona l'audio da convertire.",
+ "Advanced Settings": "Impostazioni avanzate",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Cancella output (elimina tutti gli audio in assets/audios)",
+ "Custom Output Path": "Percorso di output personalizzato",
+ "Output Path": "Percorso di output",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Il percorso in cui verrà salvato l'audio di output, per impostazione predefinita in assets/audios/output.wav",
+ "Split Audio": "Dividi audio",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Dividi l'audio in blocchi per l'inferenza per ottenere risultati migliori in alcuni casi.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Applica un soft autotune alle tue inferenze, consigliato per le conversioni di canto.",
+ "Clean Audio": "Pulisci audio",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Pulisci l'output audio utilizzando algoritmi di rilevamento del rumore, consigliato per gli audio parlati.",
+ "Clean Strength": "Intensità di pulizia",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Imposta il livello di pulizia dell'audio desiderato, più lo aumenti più si pulirà, ma è possibile che l'audio risulti più compresso.",
+ "Pitch": "Intonazione",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Imposta l'intonazione dell'audio, più alto è il valore, più alta sarà l'intonazione.",
+ "Filter Radius": "Raggio del filtro",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Se il numero è maggiore o uguale a tre, l'impiego del filtraggio mediano sui risultati del tono raccolto ha il potenziale per ridurre la respirazione.",
+ "Search Feature Ratio": "Rapporto di ricerca delle caratteristiche",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influenza esercitata dal file indice; un valore più alto corrisponde a una maggiore influenza. Tuttavia, optare per valori più bassi può aiutare a mitigare gli artefatti presenti nell'audio.",
+ "Volume Envelope": "Inviluppo del volume",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Sostituisci o miscela con l'inviluppo del volume dell'output. Più il rapporto è vicino a 1, più viene impiegato l'inviluppo di uscita.",
+ "Protect Voiceless Consonants": "Proteggi consonanti sorde",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Salvaguarda le consonanti distinte e i suoni respiratori per prevenire distorsioni elettroacustiche e altri artefatti. Portando il parametro al valore massimo di 0,5 si ottiene una protezione completa. Tuttavia, la riduzione di questo valore potrebbe diminuire l'estensione della protezione, riducendo al contempo l'effetto di indicizzazione.",
+ "Pitch extraction algorithm": "Algoritmo di estrazione dell'intonazione",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritmo di estrazione dell'intonazione da utilizzare per la conversione audio. L'algoritmo predefinito è rmvpe, consigliato per la maggior parte dei casi.",
+ "Convert": "Converti",
+ "Export Audio": "Esporta audio",
+ "Batch": "Batch",
+ "Input Folder": "Cartella di input",
+ "Select the folder containing the audios to convert.": "Seleziona la cartella contenente gli audio da convertire.",
+ "Enter input path": "Inserisci il percorso di input",
+ "Output Folder": "Cartella di output",
+ "Select the folder where the output audios will be saved.": "Seleziona la cartella in cui verranno salvati gli audio di output.",
+ "Enter output path": "Inserisci il percorso di output",
+ "Get information about the audio": "Ottieni informazioni sull'audio",
+ "Information about the audio file": "Informazioni sul file audio",
+ "Waiting for information...": "In attesa di informazioni...",
+ "## Voice Blender": "## Miscelatore vocale",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Seleziona due modelli vocali, imposta la percentuale di fusione desiderata e uniscili in una voce completamente nuova.",
+ "Voice Blender": "Miscelatore vocale",
+ "Drag and drop your model here": "Trascina e rilascia qui il tuo modello",
+ "You can also use a custom path.": "Puoi anche utilizzare un percorso personalizzato.",
+ "Blend Ratio": "Rapporto di miscelazione",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Regolando la posizione più verso un lato o l'altro si renderà il modello più simile al primo o al secondo.",
+ "Fusion": "Fusione",
+ "Path to Model": "Percorso del modello",
+ "Enter path to model": "Inserisci il percorso del modello",
+ "Model information to be placed": "Informazioni sul modello da inserire",
+ "Inroduce the model information": "Introduci le informazioni sul modello",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Le informazioni da inserire nel modello (puoi lasciarle vuote o inserire qualsiasi cosa).",
+ "View model information": "Visualizza le informazioni sul modello",
+ "Introduce the model pth path": "Introduci il percorso pth del modello",
+ "View": "Visualizza",
+ "Model extraction": "Estrazione del modello",
+ "Model conversion": "Conversione del modello",
+ "Pth file": "File Pth",
+ "Output of the pth file": "Output del file pth",
+ "# How to Report an Issue on GitHub": "# Come segnalare un problema su GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Fare clic sul pulsante \"Registra schermo\" in basso per avviare la registrazione del problema riscontrato.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Una volta terminata la registrazione del problema, fai clic sul pulsante \"Interrompi registrazione\" (lo stesso pulsante, ma l'etichetta cambia a seconda che tu stia registrando attivamente o meno).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vai su [GitHub Issues](https://github.com/IAHispano/Applio/issues) e fai clic sul pulsante \"Nuovo problema\".",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Completa il modello di problema fornito, assicurandoti di includere i dettagli necessari e utilizza la sezione delle risorse per caricare il file registrato dal passaggio precedente.",
+ "Record Screen": "Registra schermo",
+ "Record": "Registrazione",
+ "Stop Recording": "Interrompi registrazione",
+ "Introduce the model .pth path": "Inserisci il percorso .pth del modello",
+ "See Model Information": "Controlla le informazioni sul modello",
+ "## Download Model": "## Scarica il modello",
+ "Model Link": "Link al modello",
+ "Introduce the model link": "Inserisci il collegamento al modello",
+ "Download Model": "Scarica il modello",
+ "## Drop files": "## Trascina i file",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Trascina il file .pth e il file .index in questo spazio. Trascina uno e poi l'altro.",
+ "TTS Voices": "Voci TTS",
+ "Select the TTS voice to use for the conversion.": "Selezionare la voce TTS da utilizzare per la conversione.",
+ "Text to Synthesize": "Testo da sintetizzare",
+ "Enter the text to synthesize.": "Immettere il testo da sintetizzare.",
+ "Or you can upload a .txt file": "In alternativa, è possibile caricare un file .txt",
+ "Enter text to synthesize": "Immettere il testo da sintetizzare",
+ "Output Path for TTS Audio": "Percorso di uscita per l'audio TTS",
+ "Output Path for RVC Audio": "Percorso di uscita per l'audio RVC",
+ "Enable Applio integration with Discord presence": "Abilita l'integrazione di Applio con la presenza di Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Attiverà la possibilità di visualizzare l'attuale attività di Applio in Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Abilita l'integrazione di Applio con applio.org/models utilizzando il pallone",
+ "It will activate the possibility of downloading models with a click from the website.": "Attiverà la possibilità di scaricare i modelli con un click dal sito web.",
+ "Theme": "Tema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Selezionare il tema che si desidera utilizzare. (Richiede il riavvio di Applio)",
+ "Language": "Lingua",
+ "Select the language you want to use. (Requires restarting Applio)": "Selezionare la lingua che si desidera utilizzare. (Richiede il riavvio di Applio)",
+ "Plugin Installer": "Programma di installazione del plug-in",
+ "Drag your plugin.zip to install it": "Trascina il plugin.zip per installarlo",
+ "Version Checker": "Controllo della versione",
+ "Check which version of Applio is the latest to see if you need to update.": "Controlla quale versione di Applio è l'ultima per vedere se è necessario eseguire l'aggiornamento.",
+ "Check for updates": "Controlla gli aggiornamenti"
+}
diff --git a/assets/i18n/languages/ja_JA.json b/assets/i18n/languages/ja_JA.json
new file mode 100644
index 0000000000000000000000000000000000000000..289644afa81e89a6c66bc287e211d4cebe6728e6
--- /dev/null
+++ b/assets/i18n/languages/ja_JA.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "他の追随を許さないパワー、モジュール性、そしてユーザーフレンドリーな操作性を実現するために綿密に最適化された究極のボイスクローニングツール。",
+ "This section contains some extra utilities that often may be in experimental phases.": "このセクションには、多くの場合、実験段階にある可能性のあるいくつかの追加のユーティリティが含まれています。",
+ "Output Information": "出力情報",
+ "The output information will be displayed here.": "出力情報がここに表示されます。",
+ "Inference": "推論",
+ "Train": "学習",
+ "Extra": "おまけ",
+ "Merge Audios": "オーディオをマージ (Merge Audios)",
+ "Processing": "加工",
+ "Audio Analyzer": "オーディオアナライザ",
+ "Model Information": "モデル情報",
+ "Plugins": "プラグイン",
+ "Download": "ダウンロード",
+ "Report a Bug": "バグを報告する",
+ "Settings": "設定",
+ "Preprocess": "前処理",
+ "Model Name": "モデル名",
+ "Name of the new model.": "新しいモデルの名前。",
+ "Enter model name": "モデル名を入力",
+ "Dataset Path": "データセット パス",
+ "Path to the dataset folder.": "データセット フォルダーへのパス。",
+ "Refresh Datasets": "データセットの更新",
+ "Dataset Creator": "データセットクリエーター",
+ "Dataset Name": "データセット名",
+ "Name of the new dataset.": "新しいデータセットの名前。",
+ "Enter dataset name": "データセット名を入力",
+ "Upload Audio Dataset": "オーディオデータセットのアップロード",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "オーディオ ファイルがデータセットに正常に追加されました。前処理ボタンをクリックしてください。",
+ "Enter dataset path": "データセットのパスを入力",
+ "Sampling Rate": "サンプリングレート",
+ "The sampling rate of the audio files.": "オーディオファイルのサンプリングレートです。",
+ "Model Architecture": "RVC バージョン",
+ "Version of the model architecture.": "モデルの RVC バージョン。",
+ "Preprocess Dataset": "データセットの前処理",
+ "Extract": "抽出",
+ "Hop Length": "ホップ長(Hop Length)",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "システムがピッチを大きく変化させるまでの時間を示します。ホップ長が短いほど、推論に時間がかかりますが、ピッチの精度は高くなる傾向があります。",
+ "Batch Size": "バッチサイズ",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "GPUの使用可能なVRAMに合わせることをお勧めします。4 に設定すると精度は向上しますが処理は遅くなり、8 に設定すると、より高速で標準的な結果が得られます。",
+ "Save Every Epoch": "すべてのエポックを保存",
+ "Determine at how many epochs the model will saved at.": "モデルが何エポックごとに保存されるかを指定します。",
+ "Total Epoch": "総エポック",
+ "Specifies the overall quantity of epochs for the model training process.": "総エポック数を指定します。",
+ "Pretrained": "事前学習済みモデル",
+ "Save Only Latest": "最新のみ保存",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "この設定を有効にすると、GとDファイルの最新バージョンのみを保存し、ストレージ容量を効果的に節約します。",
+ "Save Every Weights": "すべてのウェイトを保存(Save Every Weights)",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "この設定により、各エポックの終了時にモデルの重みを保存できます。",
+ "Custom Pretrained": "カスタム事前学習済みモデル",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "カスタム事前学習済みモデルを利用することで、優れた結果を得ることができます。特定のユースケースに合わせて最も適した事前学習済みモデルを選択することで、パフォーマンスが大幅に向上する可能性があります。",
+ "Upload Pretrained Model": "事前学習済みモデルのアップロード",
+ "Refresh Custom Pretraineds": "カスタム事前学習済みモデルの更新",
+ "Pretrained Custom Settings": "事前学習済みモデルのカスタム設定",
+ "The file you dropped is not a valid pretrained file. Please try again.": "ドロップしたファイルは有効な事前学習済みモデルファイルではありません。もう一度やり直してください。",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "更新ボタンをクリックすると、ドロップダウンメニューに事前学習済みモデルが表示されます。",
+ "Pretrained G Path": "カスタム事前学習済みモデル G",
+ "Pretrained D Path": "カスタム事前学習済みモデル D",
+ "GPU Settings": "GPU 設定",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "GPUアーキテクチャが優れているユーザーに推奨される、高度なGPU設定を設定します。",
+ "GPU Custom Settings": "GPU カスタム設定",
+ "GPU Number": "GPU番号",
+ "0 to ∞ separated by -": "範囲は0 から ∞ で、-で区切られます。",
+ "GPU Information": "GPU 情報",
+ "Pitch Guidance": "ピッチガイダンス",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "ピッチガイダンスを採用することで、ピッチを含め、元の声のイントネーションを反映させることが可能になります。この機能は、歌唱など、オリジナルのメロディーやピッチパターンを保持することが不可欠な場面で特に有用です。",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "独自のトレーニングを行う場合は、事前学習済みモデルを利用します。このアプローチにより、学習時間が短縮され、全体的な品質が向上します。",
+ "Extract Features": "特徴量の抽出",
+ "Start Training": "トレーニングを開始",
+ "Generate Index": "インデックスの生成",
+ "Voice Model": "音声モデル",
+ "Select the voice model to use for the conversion.": "変換に使用する音声モデルを選択します。",
+ "Index File": "インデックス ファイル",
+ "Select the index file to use for the conversion.": "変換に使用するインデックスファイルを選択します。",
+ "Refresh": "リフレッシュ",
+ "Unload Voice": "モデルのアンロード",
+ "Single": "シングル",
+ "Upload Audio": "オーディオのアップロード",
+ "Select Audio": "オーディオを選択",
+ "Select the audio to convert.": "変換するオーディオを選択します。",
+ "Advanced Settings": "詳細設定",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Clear Outputs (アセット/オーディオ内のすべてのオーディオを削除します)",
+ "Custom Output Path": "カスタム出力パス",
+ "Output Path": "出力パス",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "出力オーディオが保存されるパス (デフォルトでは assets/audios/output.wav",
+ "Split Audio": "オーディオの分割",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "音声をチャンクに分割して推論すると、より良い結果が得られる場合があります。",
+ "Autotune": "オートチューン",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "推論にソフトオートチューンを適用します。歌の変換に推奨されます。",
+ "Clean Audio": "クリーンオーディオ",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "音声を話すのに推奨されるノイズ検出アルゴリズムを使用して、音声出力をクリーンアップします。",
+ "Clean Strength": "クリーンな強さ",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "クリーンアップレベルを必要なオーディオに設定すると、上げれば上げるほどクリーンアップされますが、オーディオがより圧縮される可能性があります。",
+ "Pitch": "ピッチ",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "オーディオのピッチを設定し、値が大きいほどピッチが高くなります。",
+ "Filter Radius": "フィルタ半径(Filter Radius)",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "数値が 3 以上の場合、収集されたトーン結果に中央値フィルタリングを使用すると、呼吸が減少する可能性があります。",
+ "Search Feature Ratio": "特徴量検索比率",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "インデックスファイルによって及ぼされる影響。値が大きいほど、インデックスの影響が大きくなります。ただし、低い値を選択すると、オーディオに存在するアーティファクトを軽減できます。",
+ "Volume Envelope": "ボリュームエンベロープ",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "出力のボリュームエンベロープで代用またはブレンドします。比率が 1 に近づくほど、出力エンベロープが採用されます。",
+ "Protect Voiceless Consonants": "無声子音の保護",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "明瞭な子音と呼吸音を保護し、電気音響の引き裂きやその他のアーチファクトを防ぎます。パラメータを最大値の 0.5 までプルすると、包括的な保護が提供されます。ただし、この値を小さくすると、インデックス作成の影響が軽減される可能性がある一方で、保護の範囲が狭くなる可能性があります。",
+ "Pitch extraction algorithm": "ピッチ抽出アルゴリズム",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "オーディオ変換に使用するピッチ抽出アルゴリズム。デフォルトのアルゴリズムは rmvpe で、ほとんどの場合に推奨されます。",
+ "Convert": "コンバート",
+ "Export Audio": "オーディオのエクスポート",
+ "Batch": "バッチ",
+ "Input Folder": "入力フォルダ",
+ "Select the folder containing the audios to convert.": "変換するオーディオを含むフォルダを選択します。",
+ "Enter input path": "入力パスを入力",
+ "Output Folder": "出力フォルダ",
+ "Select the folder where the output audios will be saved.": "出力オーディオを保存するフォルダを選択します。",
+ "Enter output path": "出力パスを入力",
+ "Get information about the audio": "オーディオに関する情報を取得する",
+ "Information about the audio file": "オーディオファイルに関する情報",
+ "Waiting for information...": "情報を待っています...",
+ "## Voice Blender": "## モデルマージ",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "2つの音声モデルを選択し、希望のブレンド率を設定して、まったく新しい音声モデルにマージします。",
+ "Voice Blender": "モデルマージ",
+ "Drag and drop your model here": "ここにモデルをドラッグ&ドロップします",
+ "You can also use a custom path.": "カスタムパスを使用することもできます。",
+ "Blend Ratio": "ブレンド比(Blend Ratio)",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "位置をどちらか一方に調整すると、モデルが最初または2番目に近づきます。",
+ "Fusion": "マージ",
+ "Path to Model": "モデルへのパス",
+ "Enter path to model": "モデルへのパスを入力",
+ "Model information to be placed": "配置するモデル情報",
+ "Inroduce the model information": "モデル情報の取得",
+ "The information to be placed in the model (You can leave it blank or put anything).": "モデルに配置する情報(空白のままでも、何でも入力できます)。",
+ "View model information": "モデル情報の表示",
+ "Introduce the model pth path": ".pthのパスを指定してください。",
+ "View": "表示",
+ "Model extraction": "モデルの抽出",
+ "Model conversion": "モデル変換",
+ "Pth file": "Pth ファイル",
+ "Output of the pth file": "p番目のファイルの出力",
+ "# How to Report an Issue on GitHub": "# GitHub で問題を報告する方法",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1.下の[画面の記録]ボタンをクリックして、発生している問題の記録を開始します。",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2.問題の記録が終了したら、[記録の停止]ボタンをクリックします(同じボタンですが、アクティブに記録しているかどうかによってラベルが変わります)。",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues)に移動し、[New Issue]ボタンをクリックします。",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. 提供された課題テンプレートに記入し、必要に応じて詳細を含め、アセット セクションを使用して前の手順で記録したファイルをアップロードします。",
+ "Record Screen": "録画画面",
+ "Record": "記録",
+ "Stop Recording": "記録の停止",
+ "Introduce the model .pth path": "モデルの .pth パスを導入する",
+ "See Model Information": "「モデル情報」を参照",
+ "## Download Model": "## モデルのダウンロード",
+ "Model Link": "モデルリンク",
+ "Introduce the model link": "モデルリンクの紹介",
+ "Download Model": "モデルのダウンロード",
+ "## Drop files": "## ファイルのドロップ",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": ".pth ファイルと .index ファイルをこのスペースにドラッグします。一方をドラッグしてから、もう一方をドラッグします。",
+ "TTS Voices": "TTS ボイス",
+ "Select the TTS voice to use for the conversion.": "変換に使用する TTS 音声を選択します。",
+ "Text to Synthesize": "合成するテキスト",
+ "Enter the text to synthesize.": "合成するテキストを入力します。",
+ "Or you can upload a .txt file": "または、.txtファイルをアップロードすることもできます",
+ "Enter text to synthesize": "合成するテキストを入力する",
+ "Output Path for TTS Audio": "TTSオーディオの出力パス",
+ "Output Path for RVC Audio": "RVCオーディオの出力パス",
+ "Enable Applio integration with Discord presence": "Applio と Discord のプレゼンスの統合を有効にする",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "これにより、Discordで現在のApplioアクティビティを表示する可能性がアクティブになります。",
+ "Enable Applio integration with applio.org/models using flask": "フラスコを使用して Applio と applio.org/models の統合を有効にする",
+ "It will activate the possibility of downloading models with a click from the website.": "ウェブサイトからクリックするだけでモデルをダウンロードする可能性がアクティブになります。",
+ "Theme": "テーマ",
+ "Select the theme you want to use. (Requires restarting Applio)": "使用するテーマを選択します。(Applioの再起動が必要)",
+ "Language": "言語",
+ "Select the language you want to use. (Requires restarting Applio)": "使用する言語を選択します。(Applioの再起動が必要)",
+ "Plugin Installer": "プラグインインストーラ",
+ "Drag your plugin.zip to install it": "plugin.zipをドラッグしてインストールします",
+ "Version Checker": "バージョンチェッカー",
+ "Check which version of Applio is the latest to see if you need to update.": "更新が必要かどうか、Applioのどのバージョンが最新かを確認してください。",
+ "Check for updates": "アップデートを確認する"
+}
diff --git a/assets/i18n/languages/jv_JV.json b/assets/i18n/languages/jv_JV.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/assets/i18n/languages/jv_JV.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/assets/i18n/languages/ko_KO.json b/assets/i18n/languages/ko_KO.json
new file mode 100644
index 0000000000000000000000000000000000000000..b35a6f731c48fb21eea874ffbdd62974d2c0d629
--- /dev/null
+++ b/assets/i18n/languages/ko_KO.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "타의 추종을 불허하는 성능, 모듈성 및 사용자 친화적인 경험을 위해 세심하게 최적화된 궁극의 음성 복제 도구입니다.",
+ "This section contains some extra utilities that often may be in experimental phases.": "이 섹션에는 아직 실험 단계에 있는 추가 유틸리티가 포함되어 있습니다.",
+ "Output Information": "출력 정보",
+ "The output information will be displayed here.": "출력 정보가 여기에 표시됩니다.",
+ "Inference": "추론",
+ "Train": "모델 학습",
+ "Extra": "기타 도구",
+ "Merge Audios": "오디오 병합",
+ "Processing": "처리",
+ "Audio Analyzer": "오디오 분석기",
+ "Model Information": "모델 정보",
+ "Plugins": "플러그인",
+ "Download": "다운로드",
+ "Report a Bug": "버그 신고",
+ "Settings": "설정",
+ "Preprocess": "사전 처리",
+ "Model Name": "모델명",
+ "Name of the new model.": "새 모델의 이름입니다.",
+ "Enter model name": "모델명 입력",
+ "Dataset Path": "데이터 세트 경로",
+ "Path to the dataset folder.": "데이터 세트 폴더의 경로입니다.",
+ "Refresh Datasets": "데이터 세트 새로 고침",
+ "Dataset Creator": "데이터셋 생성기",
+ "Dataset Name": "데이터 세트 이름",
+ "Name of the new dataset.": "새 데이터 세트의 이름입니다.",
+ "Enter dataset name": "데이터 세트 이름 입력",
+ "Upload Audio Dataset": "오디오 데이터 세트 업로드",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "오디오 파일이 데이터 세트에 성공적으로 추가되었습니다. 전처리 버튼을 클릭해주세요.",
+ "Enter dataset path": "데이터 세트 경로 입력",
+ "Sampling Rate": "샘플링 레이트",
+ "The sampling rate of the audio files.": "오디오 파일의 샘플링 레이트입니다.",
+ "Model Architecture": "RVC 버전",
+ "Version of the model architecture.": "모델의 RVC 버전입니다.",
+ "Preprocess Dataset": "데이터 세트 사전 처리",
+ "Extract": "추출물",
+ "Hop Length": "홉 길이",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "시스템이 중요한 피치 변화로 전환되는 데 걸리는 시간을 나타냅니다. 홉 길이가 작을수록 추론에 더 많은 시간이 필요하지만 피치 정확도가 높아지는 경향이 있습니다.",
+ "Batch Size": "배치 크기(Batch Size)",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "GPU의 사용 가능한 VRAM에 맞추는 것이 좋습니다. 4로 설정하면 정확도가 향상되지만 처리 속도가 느려지고, 8로 설정하면 더 빠르고 표준적인 결과를 얻을 수 있습니다.",
+ "Save Every Epoch": "모든 Epoch를 저장합니다",
+ "Determine at how many epochs the model will saved at.": "모델이 몇 epoch에 저장될지 결정합니다.",
+ "Total Epoch": "Total Epoch(총 에포크)",
+ "Specifies the overall quantity of epochs for the model training process.": "모델 학습 프로세스에 대한 전체 epoch 수를 지정합니다.",
+ "Pretrained": "사전학습 모델 사용",
+ "Save Only Latest": "마지막 파일만 저장(Save Only Latest)",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "이 설정을 활성화하면 마지막으로 저장된 G 및 D 파일만 남게 되므로 저장 공간을 효과적으로 절약할 수 있습니다.",
+ "Save Every Weights": "모든 가중치 저장",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "이 설정을 사용하면 각 Epoch가 끝날 때 모델의 가중치를 저장할 수 있습니다.",
+ "Custom Pretrained": "커스텀 사전학습 모델 사용",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "특정 사용 사례에 맞는 가장 적합한 커스텀 사전학습 모델을 선택하면 성능이 크게 향상될 수 있고 우수한 결과를 얻을 수 있습니다.",
+ "Upload Pretrained Model": "사전학습 된 모델 업로드",
+ "Refresh Custom Pretraineds": "커스텀 사전학습 모델 새로 고침",
+ "Pretrained Custom Settings": "커스텀 사전학습 모델 설정",
+ "The file you dropped is not a valid pretrained file. Please try again.": "드롭한 파일이 유효한 사전학습 모델 파일이 아닙니다. 다시 시도하십시오.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "새로 고침 버튼을 클릭하면 드롭다운 메뉴에서 사전 학습 된 파일을 볼 수 있습니다.",
+ "Pretrained G Path": "커스텀 사전학습 모델 G",
+ "Pretrained D Path": "커스텀 사전학습 모델 D",
+ "GPU Settings": "GPU 설정",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "고급 GPU 아키텍처를 사용하는 사용자에게 권장되는 GPU 설정.",
+ "GPU Custom Settings": "GPU 커스텀 설정",
+ "GPU Number": "GPU 번호",
+ "0 to ∞ separated by -": "0 - ∞ - 로 구분",
+ "GPU Information": "GPU 정보",
+ "Pitch Guidance": "음정(Pitch) 안내",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "음정(Pitch) 가이던스를 사용하면 음정을 포함하여 원래 목소리의 억양을 미러링할 수 있습니다. 이 기능은 노래 및 원래 멜로디 또는 음정 패턴의 보존이 필수적인 것에 특히 유용합니다.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "자체 모델을 학습할 때 사전 학습된 모델을 활용합니다. 이 접근 방식은 학습 시간을 줄이고 전반적인 품질을 향상시킵니다.",
+ "Extract Features": "특징 추출",
+ "Start Training": "트레이닝 시작하기",
+ "Generate Index": "인덱스 생성",
+ "Voice Model": "음성 모델",
+ "Select the voice model to use for the conversion.": "변환에 사용할 음성 모델을 선택합니다.",
+ "Index File": "색인 파일",
+ "Select the index file to use for the conversion.": "변환에 사용할 인덱스 파일을 선택합니다.",
+ "Refresh": "새로고침",
+ "Unload Voice": "음성 언로드",
+ "Single": "싱글",
+ "Upload Audio": "오디오 업로드",
+ "Select Audio": "오디오 선택",
+ "Select the audio to convert.": "변환할 오디오를 선택합니다.",
+ "Advanced Settings": "고급 설정",
+ "Clear Outputs (Deletes all audios in assets/audios)": "출력 지우기(에셋/오디오의 모든 오디오 삭제)",
+ "Custom Output Path": "사용자 지정 출력 경로",
+ "Output Path": "출력 경로",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "출력 오디오가 저장될 경로 입니다, 기본값 assets/audios/output.wav",
+ "Split Audio": "오디오 분할",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "경우에 따라 더 나은 결과를 얻기 위해 오디오를 청크로 분할합니다.",
+ "Autotune": "오토튠",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "추론 시 소프트 오토튠을 적용합니다. 노래에 권장합니다",
+ "Clean Audio": "디-노이즈 오디오",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "노이즈 감지 알고리즘을 사용하여 출력되는 오디오의 불필요한 정보를 정리합니다. 연설과 같은 스피치 오디오에 적합합니다.",
+ "Clean Strength": "감지 강도",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "감지 레벨을 높이면 높일수록 정리가 더 많이 되지만 오디오가 더 압축되어 음질 저하가 생길 수 있습니다.",
+ "Pitch": "음정",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "오디오의 음정(Pitch)을 설정합니다. 값이 높을수록 음정이 높아집니다.",
+ "Filter Radius": "필터 반경",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "숫자가 3보다 크거나 같을 때 수집된 톤 결과에 중앙값 필터링을 사용하여 호흡등의 소리를 감소 시킬 수 있습니다",
+ "Search Feature Ratio": "특성 검색 비율(Search Feature Ratio)",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "특성 인덱스 파일이 미치는 영향; 값이 높을수록 모델의 특성을 잘 나타내지만 값이 낮을수록 인공적인 느낌을 줄어들 수도 있습니다.",
+ "Volume Envelope": "볼륨 엔벨로프(Volume Envelope)",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "출력의 볼륨 포락선(Envelope)로 대체하거나 혼합합니다. 비율이 1에 가까울수록 출력 엔벨로프가 더 많이 사용됩니다.",
+ "Protect Voiceless Consonants": "무성 자음 보호",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "뚜렷한 자음과 숨소리를 보호하여 전자 음향 찢어짐 및 기타 아티팩트를 방지합니다. 설정값을 최대치인 0.5로 끌어오면 포괄적인 보호가 제공됩니다. 반대로 보호 범위를 낮출 경우 인덱싱 효과를 완화할 수 있습니다",
+ "Pitch extraction algorithm": "음정(Pitch) 추출 알고리즘",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "오디오 변환에 사용할 음정 추출 알고리즘입니다. 디폴트 알고리즘은 rmvpe이며, 대부분의 경우에 권장됩니다.",
+ "Convert": "변환",
+ "Export Audio": "오디오 내보내기",
+ "Batch": "일괄",
+ "Input Folder": "입력 폴더",
+ "Select the folder containing the audios to convert.": "변환할 오디오가 포함된 폴더를 선택합니다.",
+ "Enter input path": "입력 경로 입력",
+ "Output Folder": "출력 폴더",
+ "Select the folder where the output audios will be saved.": "출력 오디오를 저장할 폴더를 선택합니다.",
+ "Enter output path": "출력 경로 입력",
+ "Get information about the audio": "오디오에 대한 정보 가져오기",
+ "Information about the audio file": "오디오 파일에 대한 정보",
+ "Waiting for information...": "정보를 기다리는 중...",
+ "## Voice Blender": "## 목소리 혼합",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "두 개의 음성 모델을 선택하고 원하는 혼합 비율을 설정하면 완전히 새로운 음성으로 혼합됩니다.",
+ "Voice Blender": "목소리 혼합",
+ "Drag and drop your model here": "여기에 모델을 끌어다 놓습니다.",
+ "You can also use a custom path.": "사용자 지정 경로를 사용할 수도 있습니다.",
+ "Blend Ratio": "합성 비율(Blend Ratio)",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "위치를 양쪽 방향으로 조정하며 두 모델의 혼합 비중을 선택할 수 있습니다.",
+ "Fusion": "목소리 혼합",
+ "Path to Model": "모델 경로",
+ "Enter path to model": "모델 경로 입력",
+ "Model information to be placed": "배치할 모델 정보",
+ "Inroduce the model information": "모델 정보 도입",
+ "The information to be placed in the model (You can leave it blank or put anything).": "모델의 정보(비워 두거나 아무 내용을 써도 됩니다).",
+ "View model information": "모델 정보 보기",
+ "Introduce the model pth path": "모델 pth 경로 소개",
+ "View": "보기",
+ "Model extraction": "모델 추출",
+ "Model conversion": "모델 변환",
+ "Pth file": "Pth 파일",
+ "Output of the pth file": "pth 파일의 출력",
+ "# How to Report an Issue on GitHub": "# GitHub에서 문제를 보고하는 방법",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. 아래의 '녹화 화면' 버튼을 클릭하여 발생한 문제의 녹화를 시작합니다.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. 문제 기록이 끝나면 '기록 중지' 버튼(동일한 버튼이지만 현재 기록 중인지 여부에 따라 레이블이 변경됨)을 클릭합니다.",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub 문제](https://github.com/IAHispano/Applio/issues)로 이동하여 '새 문제' 버튼을 클릭합니다.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. 제공된 문제 템플릿을 완료하고 필요에 따라 세부 정보를 포함하고 자산 섹션을 활용하여 이전 단계에서 기록된 파일을 업로드합니다.",
+ "Record Screen": "녹화 화면",
+ "Record": "기록",
+ "Stop Recording": "기록 중지",
+ "Introduce the model .pth path": "모델 .pth 경로 소개",
+ "See Model Information": "모델 정보 보기",
+ "## Download Model": "## 모델 다운로드",
+ "Model Link": "모델 링크",
+ "Introduce the model link": "모델 소개 링크",
+ "Download Model": "모델 다운로드",
+ "## Drop files": "## 파일 드롭",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": ".pth 파일과 .index 파일을 이 공간으로 드래그합니다. 하나를 드래그한 다음 다른 하나를 드래그합니다.",
+ "TTS Voices": "TTS 음성",
+ "Select the TTS voice to use for the conversion.": "변환에 사용할 TTS 음성을 선택합니다.",
+ "Text to Synthesize": "합성할 텍스트(Text to Synthesize)",
+ "Enter the text to synthesize.": "합성할 텍스트를 입력합니다.",
+ "Or you can upload a .txt file": "또는 .txt 파일을 업로드할 수 있습니다.",
+ "Enter text to synthesize": "합성할 텍스트 입력",
+ "Output Path for TTS Audio": "TTS 오디오의 출력 경로",
+ "Output Path for RVC Audio": "RVC 오디오의 출력 경로",
+ "Enable Applio integration with Discord presence": "Discord와 Applio 통합 활성화",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Discord에서 Applio의 활동이 표시되는 기능을 활성화합니다.",
+ "Enable Applio integration with applio.org/models using flask": "플라스크를 사용하여 applio.org/models 와 Applio 통합 활성화",
+ "It will activate the possibility of downloading models with a click from the website.": "웹 사이트에서 클릭 한 번으로 모델을 다운로드 할 수 있습니다.",
+ "Theme": "테마",
+ "Select the theme you want to use. (Requires restarting Applio)": "사용할 테마를 선택합니다. (Applio를 다시 시작해야 함)",
+ "Language": "언어",
+ "Select the language you want to use. (Requires restarting Applio)": "사용할 언어를 선택합니다. (Applio를 다시 시작해야 함)",
+ "Plugin Installer": "플러그인 인스톨러",
+ "Drag your plugin.zip to install it": "plugin.zip 드래그하여 설치하십시오.",
+ "Version Checker": "버전 검사기",
+ "Check which version of Applio is the latest to see if you need to update.": "Applio의 최신 버전을 확인하여 업데이트가 필요한지 확인하십시오.",
+ "Check for updates": "업데이트 확인"
+}
diff --git a/assets/i18n/languages/ml_IN.json b/assets/i18n/languages/ml_IN.json
new file mode 100644
index 0000000000000000000000000000000000000000..5ebda51e75828954d7ba9e310f149b5cee5b4309
--- /dev/null
+++ b/assets/i18n/languages/ml_IN.json
@@ -0,0 +1,204 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "അനന്തമായ ശക്തി, മോഡുലാരിറ്റി, ഉപയോക്തൃ-സൗഹൃദ അനുഭവത്തിനായി യാഥാർത്ഥ്യമാക്കിയ അത്യുന്നതമായ വോയ്സ് ക്ലോണിങ് ടൂൾ.\n[മലയാളത്തിലേക്ക് വായന: Enes](https://discord.com/users/1140031358006202468)",
+ "This section contains some extra utilities that often may be in experimental phases.": "ഈ പേരിലെ കൊണ്ടാടാൻ പ്രയോജനപ്രദമായ എന്നതിനാൽ കഴിഞ്ഞത് സാധാരണയായ പോസിക്കുകളിൽ അവസാനിക്കാത്ത ചില യന്ത്രങ്ങൾ ഉള്ളതാണ്.",
+ "Output Information": "പ്രണാമം വിവരം",
+ "The output information will be displayed here.": "ഇവിടെ പ്രണയ വിവരങ്ങൾ പ്രദശിപ്പിക്കപ്പെടും.",
+ "Inference": "സൂചന",
+ "Train": "പ്രശിക്ഷണം",
+ "Extra": "അധികം",
+ "Merge Audios": "ഓഡിയോ ഒടിക്കുക",
+ "Processing": "പ്രൊസസ്സിംഗ്",
+ "Audio Analyzer": "ഓഡിയോ വിശ്ലേഷണകൾ",
+ "Model Information": "മോഡൽ വിവരങ്ങൾ",
+ "Plugins": "പ്ലഗിൻസ്",
+ "Download": "ഡൗൺലോഡ്",
+ "Report a Bug": "പിശക് റിപ്പോർട്ട്",
+ "Settings": "സെറ്റിംഗുകൾ",
+ "Preprocess": "പ്രൊസസ്",
+ "Model Name": "മോഡൽ പേര്",
+ "Name of the new model.": "പുതിയ മോഡലിന്റെ പേര്.",
+ "Enter model name": "മോഡൽ പേര് നൽകുക",
+ "Dataset Path": "ഡാറ്റാസെറ്റ് പാത",
+ "Path to the dataset folder.": "ഡാറ്റാസെറ്റ് ഫോൾഡർക്കുള്ള പാത.",
+ "Refresh Datasets": "ഡാറ്റാസെറ്റുകൾ പുനഃസൃഷ്ടിക്കുക",
+ "Dataset Creator": "ഡാറ്റാസെറ്റ് സൃഷ്ടാവ്",
+ "Dataset Name": "ഡാറ്റാസെറ്റ് പേര്",
+ "Name of the new dataset.": "പുതിയ ഡാറ്റാസെറ്റിന്റെ പേര്.",
+ "Enter dataset name": "ഡാറ്റാസെറ്റ് പേര് നൽകുക",
+ "Upload Audio Dataset": "ഓഡിയോ ഡാറ്റാസെറ്റ് അപ്ലോഡ് ചെയ്യുക",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ഓഡിയോ ഫയൽ യഥാർത്ഥമായി ഡാറ്റാസെറ്റിലേക്ക് ചേർന്നു. ദയവായി പ്രൊസെസ് ബട്ടൺ അമർത്തുക.",
+ "Enter dataset path": "ഡാറ്റാസെറ്റ് പാത നൽകുക",
+ "Sampling Rate": "സാമ്പ്ലിംഗ് റേറ്റ്",
+ "The sampling rate of the audio files.": "ഓഡിയോ ഫയലുകളുടെ സാമ്പ്ലിംഗ് റേറ്റ്.",
+ "Model Architecture": "RVC പതിപ്പ്",
+ "Version of the model architecture.": "മോഡലിന്റെ RVC പതിപ്പ്.",
+ "Preprocess Dataset": "ഡാറ്റാസെറ്റ് പ്രൊസെസ് ചെയ്യുക",
+
+ "Embedder Model": "Embedder Model",
+ "Model used for learning speaker embedding.": "സ്പീക്കർ എംബെഡ്ഡിംഗ് പഠിപ്പിക്കാൻ ഉപയോഗിക്കുന്ന മോഡൽ.",
+ "Extract": "എക്സ്ട്രാക്ട്",
+ "Hop Length": "ഹോപ്പ് ലെന്ത്",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "സിസ്റ്റത്തിൽ ഒരു വിശദ പിച്ച് മാറ്റത്തിന്റെ ദാരിദ്ര്യം സൂചിപ്പിക്കുന്നു. ചെറിയ ഹോപ്പ് ലെന്തുകള് ഇൻഫരൻസിനായി കൂടുതൽ സമയം ആവശ്യപ്പെടുന്നു, എന്നിരുന്നാലും ഉയരമായ പിച്ച് അക്ക്യൂറസി നൽകുന്നു.",
+ "Batch Size": "ബാച്ച് വലുപ്പം",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "നിനക്ക് ലഭ്യമായ GPU-യുടെ VRAM നുസരിച്ച് അലൈൻ ചെയ്യുന്നത് പ്രാധാന്യപ്പെടുന്നു. 4-ന്റെ സെറ്റിംഗ് മൊത്തം അക്ക്യൂറസി പരിഷ്കർത്തനവും എല്ലാത്തിനെയും അവലംബപ്പെടുന്നു, എന്നിരുന്നാലും 8 വലുപ്പം അനുഭവജീവനത്തിനായി ഉന്നയിക്കുന്നു.",
+ "Save Every Epoch": "എന്നാൽ എന്റെ എപ്പൊക്കാലിലെയും മോഡൽ സേവ് ചെയ്യുന്നതിനു വരെ",
+ "Determine at how many epochs the model will be saved at.": "എന്താണ് എപ്പൊക്കാലിൽ മോഡൽ സേവ് ചെയ്യപ്പെടുന്നതെന്ന് തിരഞ്ഞെടുക്കുക.",
+ "Total Epoch": "മൊത്തം എപ്പൊക്ക്",
+ "Specifies the overall quantity of epochs for the model training process.": "മോഡൽ പ്രശിക്ഷണ പ്രക്രിയയ്ക്കായി എപ്പൊക്ക് എത്രയാണ് എന്നത് നിര്ദിഷ്ടമാക്കുന്നു.",
+ "Pretrained": "പ്രീട്രെയിനെഡ്",
+ "Save Only Latest": "കേവലായി പുതിയത് മാത്രം സേവ് ചെയ്യുക",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "ഈ സെറ്റിംഗ് പ്രവർത്തനത്തിനായി ക്രമീകരിച്ചാൽ, G മറ്റും D ഫയലുകൾ അവരുടെ അവസാന പതിപ്പുകൾക്ക് മാത്രം സേവ് ചെയ്യും, പ്രഭാവകരമായി സ്റ്റോറേജ് സ്ഥലം സംരക്ഷിക്കുന്നതാണ്.",
+ "Save Every Weights": "എന്റെ എപ്പൊക്കാലിലും ഭാരം സേവ് ചെയ്യുക",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "ഈ സെറ്റിംഗ് നിര്ദ്ദേശപ്പെടുന്ന ഓരോ എപ്പൊക്കിലും മോഡലിന്റെ ഭാരങ്ങൾ സേവ് ചെയ്യാൻ കഴിയുന്നു.",
+ "Custom Pretrained": "കസ്റ്റം പ്രീട്രെയിനെഡ്",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "കസ്റ്റം പ്രീട്രെയിനെഡ് മോഡലുകൾ ഉപയോഗിക്കുന്നത് ഉന്നത ഫലങ്ങൾ നൽകിയാൽ, സ്പഷ്ടമായ ഉപയോഗ കേസിനായി കൈമേലെ പ്രീട്രെയിനെഡ് മോഡലുകൾ തിരഞ്ഞെടുക്കുന്നത് സാധാരണയായ പ്രദർശനത്തെ വളർത്തുന്നതിനും പ്രദർശനം വളർത്തുന്നതിനും കൂടുതൽ സഹായകമാകും.",
+ "Upload Pretrained Model": "പ്രീട്രെയിനെഡ് മോഡൽ അപ്ലോഡ് ചെയ്യുക",
+ "Refresh Custom Pretraineds": "കസ്റ്റം പ്രീട്രെയിനെഡുകൾ പുനഃസ്വന്തമാക്കുക",
+ "Pretrained Custom Settings": "പ്രീട്രെയിനെഡ് കസ്റ്റം സെറ്റിംഗുകൾ",
+ "The file you dropped is not a valid pretrained file. Please try again.": "നിനക്ക് ഡ്രോപ്പ് ചെയ്യിയ ഫയൽ ഒരു സാധാരണ പ്രീട്രെയിനെഡ് ഫയലല്ല. ദയവായി വീണ്ടും ശ്രയിക്കുക.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "പ്രീട്രെയിനെഡ് ഫയലെ ഡ്രോപ്പ്ഡൌൺ മെനുവിലെത്താൻ റെഫ്രഷ് ബട്ടൺ ക്ലിക്കുചെയ്യുക.",
+ "Pretrained G Path": "കസ്റ്റം പ്രീട്രെയിനെഡ് G പാത",
+ "Pretrained D Path": "കസ്റ്റം പ്രീട്രെയിനെഡ് D പാത",
+ "GPU Settings": "GPU സെറ്റിംഗുകൾ",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "അട്വാൻസ്ഡ് GPU സെറ്റിംഗുകൾ സജ്ജീവമാക്കുന്നു, പ്രശസ്ത GPU ആർക്കിടെയുള്ള ഉപയോക്താക്കളിനായി ശിഫാരസ് ചെയ്തത്.",
+ "GPU Custom Settings": "GPU കസ്റ്റം സെറ്റിംഗുകൾ",
+ "GPU Number": "GPU നമ്പർ",
+ "0 to ∞ separated by -": "0 മുതൽ ∞ വരെ - ഒടുക്കിയ",
+ "GPU Information": "GPU വിവരം",
+ "Pitch Guidance": "പിച്ച് ഗൈഡൻസ്",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "പിച്ച് ഗൈഡൻസ് ഉപയോഗിച്ച്, ഓരിജിനൽ വോയ്സിന്റെ ഇൻറോണേഷനെ, അതിന്റെ പിച്ചു സേവനേന്ന്, സോണ്റിംഗ് മുതലായ സന്നിധികളിൽ പ്രാഥമിക സ്വരം അല്ലെങ്കിൽ പിച്ച് നമ്പറെ പരിരക്ഷിക്കേണ്ടതായ സ്ഥിതികളിലേക്ക് മികച്ച ഫലങ്ങൾ പ്രാപ്തമാക്കാനാകുന്നു.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "നിനക്ക് സ്വന്തമായി പ്രശിക്ഷണം നടത്തുമ്പോൾ പ്രീട്രെയിനെഡ് മോഡലുകൾ ഉപയോഗിക്കുക. ഈ രീതി പ്രശിക്ഷണ സമയം കുറയ്ക്കുന്നുവെങ്കിൽ മൊത്തം ഗുണമേന്മ വരും.",
+ "Extract Features": "ഫീച്ചർ എക്സ്ട്രാക്ട്",
+ "Overtraining Detector": "ഓവർട്രെയിനിംഗ് ഡിറ്റക്റ്റർ",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "പ്രശിക്ഷണ ഡാറ്റയെ നിങ്ങളുടെ മോഡൽ അതിന്റെ തരംതിരിച്ചു പഠിക്കാൻ അനുവദിക്കുന്നത് നിലവിൽ നിന്ന് ഓവർട്രെയിനിംഗ് ശ്രമിക്കുക.",
+ "Overtraining Detector Settings": "ഓവർട്രെയിനിംഗ് ഡിറ്റക്റ്റർ സെറ്റിംഗുകൾ",
+ "Overtraining Threshold": "ഓവർട്രെയിനിംഗ് താഴ്ന്ന മിതം",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "യാത്രാവധി പരിശോധിച്ചിട്ടില്ലാത്ത അഭിവൃദ്ധി നിരയെടുക്കുകയാണെങ്കിൽ നിങ്ങളുടെ മോഡൽ പരിശോധനയെന്നത് പ്രശിക്ഷണം നിലനിൽക്കാനുള്ള ഏറ്റവും ഉന്നത എപ്പോക്കുകൾ സജ്ജമാക്കുക.",
+
+ "Start Training": "പ്രശിക്ഷണം ആരംഭിക്കുക",
+ "Stop Training & Restart Applio": "പ്രശിക്ഷണം നിർത്തുക & അപ്ലിയോ പുനഃപ്രാരംഭിക്കുക",
+ "Generate Index": "ഇൻഡെക്സ് സൃഷ്ടിക്കുക",
+
+ "Export Model": "എക്സ്പോർട്ട് മോഡൽ",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'അപ്ലോഡ്' ബട്ടൺ കേവലം ഗൂഗിൾ കോളാബിന് മാത്രം: നിങ്ങളുടെ ഗൂഗിൾ ഡ്രൈവിലെ ApplioExported ഫോൾഡറിലേക്ക് എക്സ്പോർട്ട് ചെയ്യുന്നു.",
+ "Exported Pth file": "എക്സ്പോർട്ട് ചെയ്ത Pth ഫയൽ",
+ "Exported Index file": "എക്സ്പോർട്ട് ചെയ്ത ഇൻഡെക്സ് ഫയൽ",
+ "Select the pth file to be exported": "എക്സ്പോർട്ട് ചെയ്യാൻ ആദ്യം pth ഫയൽ തിരഞ്ഞെടുക്കുക",
+ "Select the index file to be exported": "എക്സ്പോർട്ട് ചെയ്യാൻ ആദ്യം ഇൻഡെക്സ് ഫയൽ തിരഞ്ഞെടുക്കുക",
+ "Upload": "അപ്ലോഡ്",
+
+ "Voice Model": "വോയ്സ് മോഡൽ",
+ "Select the voice model to use for the conversion.": "കണ്വേർഷനിനായി ഉപയോഗിക്കാൻ വോയ്സ് മോഡലുകൾ തിരഞ്ഞെടുക്കുക.",
+ "Index File": "ഇൻഡെക്സ് ഫയൽ",
+ "Select the index file to use for the conversion.": "കണ്വേർഷനിനായി ഉപയോഗിക്കേണ്ട ഇൻഡെക്സ് ഫയലുകൾ തിരഞ്ഞെടുക്കുക.",
+ "Refresh": "പുനഃസ്വന്തമാക്കുക",
+ "Unload Voice": "വോയ്സ് അൺലോഡ്",
+ "Single": "ഏകത്വം",
+ "Upload Audio": "ഓഡിയോ അപ്ലോഡ് ചെയ്യുക",
+ "Select Audio": "ഓഡിയോ തിരഞ്ഞെടുക്കുക",
+ "Select the audio to convert.": "കണ്വേർട്ട് ചെയ്യാൻ ഓഡിയോ തിരഞ്ഞെടുക്കുക.",
+ "Advanced Settings": "പുതുമയായ സെറ്റിംഗുകൾ",
+ "Clear Outputs (Deletes all audios in assets/audios)": "പരിമാറ്റുക (assets/audios എല്ലാ ഓഡിയോകൾ ഇല്ലാതാക്കുക)",
+ "Custom Output Path": "കസ്റ്റം ഔട്ട്പുട്ട് പാത",
+ "Output Path": "ഔട്ട്പുട്ട് പാത",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "ഓട്ട്പുട്ട് ഓഡിയോ സേവ്ചെയ്യപ്പെടുന്നത്, സ്വഭാവമായി assets/audios/output.wav ഇല്",
+ "Split Audio": "ഓഡിയോ വിഭാഗീകരണം",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "അനുമാനത്തിന് കൂടുതൽ ഫലങ്ങൾ ലഭിക്കാൻ ഓഡിയോ ഭാഗങ്ങൾക്ക് വിഭാഗീകരണം ചെയ്യുക.",
+ "Autotune": "ഓട്ടോട്യൂൺ",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "സോഫ്റ്റ് ഓട്ടോട്യൂൺ ആപ്ലയുകയും, സിംഗിങ് കൺവേർഷനുകളില് ശിഫാരസ് ചെയ്യുകയും.",
+ "Clean Audio": "ശുദ്ധമായ ഓഡിയോ",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ശുദ്ധമായി നോയിസ് ഡിറ്റക്ഷൻ ആൾഗോരിതങ്ങൾ ഉപയോഗിച്ച് നിനക്ക് എത്ര പ്രയോജനപ്രദമായ ഓഡിയോ പരിഷ്കരിക്കാൻ, സ്പീക്കിംഗ് ഓഡിയോക്കിന് ശിഫാരസ് ചെയ്യുക.",
+ "Clean Strength": "ശുദ്ധി ശക്തി",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "നിനക്ക് അവശ്യമായ ഓഡിയോക്ക് ശുദ്ധിയുടെ നില സജ്ജീവമാക്കുക, അതെക്കും കൂടുതൽ ഉള്ളതും അതിനെക്കുറിച്ച് ചോദിക്കുന്നതെന്തെങ്കിലും ശുദ്ധി ചെയ്തിരിക്കുന്ന ഓഡിയോ കമ്പ്രസ്ഡ് ആയിരിക്കാനുള്ള സാധ്യതയുണ്ട്.",
+ "Pitch": "പിച്ച്",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ഓഡിയോയുടെ പിച്ച് സജ്ജീവമാക്കുക, അതെക്കും ഉയരുന്നുവെങ്കിലും പിച്ച്.",
+ "Filter Radius": "ഫിൽട്ടർ റേഡിയസ്",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "സംഖ്യ 3 അല്ലെങ്കിൽ അതിനേക്കാൾ കൂടുതൽ ആയിരിക്കുന്നാല്, ശ്വസനം കുറയ്ക്കാന് ശേഷിക്കുന്ന രീതിയില് കൂടുതല് കഴിവുള്ളതാണ്.",
+ "Search Feature Ratio": "സേര്ച്ച് ഫീച്ചർ റേഷ്യോ",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "ഇനഡെക്സ് ഫയലായി വികസിക്കുന്ന പ്രഭാവം; ഉയര്ന്ന മൂല്യം ഉയരത്തിന് അനുബന്ധ പ്രഭാവമുള്ളതാണ്. എനിക്ക് കുറഞ്ഞ മൂല്യങ്ങളെ അനുവദിക്കാനും ആര്ടിഫാക്ടുകള് നിലവിലുള്ള ഓഡിയോയിലെ ബുദ്ധിമുട്ടുകൾ ഉപയോഗപ്പെടുന്നു.",
+ "Volume Envelope": "വോള്യൂം എൻവലപ്പ്",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "ആഉട്ട്പുട്ട് ഒറ്റവന്നേറ്റത്തിന്റെ വോള്യൂം എൻവലപ്പ് സ്ഥലപ്പെടുത്തുക. അനുഭവം 1-ക്കു സമീപമായിരിക്കുന്നതും, അനുഭവ എൻവലപ്പ് ഉപയോഗപ്പെടുന്നതും കൂടുതൽ ആണ്.",
+ "Protect Voiceless Consonants": "വോയ്സ്ലസ് കോൺസനന്റുകൾ സംരക്ഷിക്കുക",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "എല്ലാവര്ക്കും പ്രകടമായ കോൺസനന്റുകൾ ഒഴുകുന്നത് എന്നതുകൊണ്ടുതന്നെ ഇലക്ട്രോ-ഓക്കുസ്റ്റിക് കൊതിയും മറ്റു ആർട്ടിഫാക്ടുകളും പ്രതിരക്ഷിക്കുന്നതിനുള്ള അരികോട്. പാരാമീറ്ററിനെ അതിന്റെ 0.5 എന്നേക്കും ഉച്ചക്കൊണ്ട് കൂട്ടിക്കൊള്ളൽ സാമൂഹ്യപ്രതിരക്ഷ നൽകുന്നു. എന്നാല്, ഈ മൂല്യം കുറഞ്ഞാക്കാൻ സാധ്യതയുണ്ട്, പ്രതിരക്ഷണം താഴെ കുറഞ്ഞുകൂടാൻ സഹായകരമാവുക.",
+ "Pitch extraction algorithm": "പിച്ച് എക്സ്ട്രാക്ഷൻ ആൾഗോരിതം",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ഓഡിയോ കൺവേർഷനിനായി ഉപയോഗിക്കേണ്ട പിച്ച് എക്സ്ട്രാക്ഷൻ ആൾഗോരിതം. സ്വതന്ത്ര ആൾഗോരിതത്താണ് rmvpe, അത് പലതരത്തിലുള്ള പ്രസ്താവനകളില് ശിഫാരസ് ചെയ്യപ്പെടുന്നു.",
+
+ "Convert": "കൺവേർട്ട് ചെയ്യുക",
+ "Export Audio": "ഓഡിയോ എക്സ്പോർട്ട് ചെയ്യുക",
+
+ "Batch": "ബാച്ച്",
+ "Input Folder": "ഇൻപുട്ട് ഫോൾഡർ",
+ "Select the folder containing the audios to convert.": "കൺവേർട്ട് ചെയ്യാൻ ഓഡിയോകളെ കാണുന്ന ഫോൾഡർ തിരഞ്ഞെടുക്കുക.",
+ "Enter input path": "ഇൻപുട്ട് പാത നൽകുക",
+ "Output Folder": "ഔട്ട്പുട്ട് ഫോൾഡർ",
+ "Select the folder where the output audios will be saved.": "ഔട്ട്പുട്ട് ഓഡിയോകൾ സേവ്ചെയ്യപ്പെടുന്ന ഫോൾഡർ തിരഞ്ഞെടുക്കുക.",
+ "Enter output path": "ഔട്ട്പുട്ട് പാത നൽകുക",
+
+ "Get information about the audio": "ഓഡിയോയുടെ കുറിപ്പ് നേടുക",
+
+ "## Voice Blender": "## വോയ്സ് ബ്ലെന്ഡർ",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "രണ്ട് വോയ്സ് മോഡലുകൾ തിരഞ്ഞെടുക്കുക, നിനക്ക് ആഗ്രഹിക്കുന്ന ബ്ലെന്റ് ശതകം സജ്ജീവമാക്കുക, അവയുടെ ബ്ലെന്റും പൂർണ്ണമായും പുതിയ ഒരു വോയ്സായാക്കുക.",
+ "Voice Blender": "വോയ്സ് ബ്ലെന്ഡർ",
+ "Drag and drop your model here": "നിനക്ക് ശൈലിക്കുകയോരോ മോഡൽ ഇവിടെ വികസിപ്പിക്കുക",
+ "You can also use a custom path.": "നിനക്ക് ഒരു സ്വന്തമായ പാതയും ഉപയോഗിക്കാം.",
+ "Blend Ratio": "ബ്ലെന്റ് അനുപാതം",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "സ്ഥാനം കൊണ്ടുകൂടുതൽ പ്രതിരൂപമാക്കാൻ മുന്നേറ്റം ഒന്നിലേറ്റെത്തിനു അല്ലെങ്കിൽ മറ്റൊന്നിലേറ്റെത്തിനു സാധിക്കും.",
+ "Fusion": "ഫ്യൂഷൻ",
+ "Path to Model": "മോഡലിന്റെ പാത",
+ "Enter path to model": "മോഡലിനെ സജ്ജീവമാക്കാൻ പാത നൽകുക",
+ "Model information to be placed": "പ്ലേസ്മെന്റ് ചെയ്യാൻ ആവശ്യമായ മോഡലിന്റെ വിവരം",
+ "Introduce the model information": "മോഡലിന്റെ വിവരം പരിചയപ്പെടുക",
+ "The information to be placed in the model (You can leave it blank or put anything).": "മോഡലില് സ്ഥലപ്പെടുത്താനുള്ള വിവരം (നിനക്ക് വിടാവുകയും അല്ലെങ്കിൽ എന്തെങ്കിലും ചേരുകയും ചെയ്യാം).",
+ "View model information": "മോഡലിന്റെ വിവരം കാണുക",
+ "Introduce the model pth path": "മോഡലിന്റെ pth പാത പരിചയപ്പെടുക",
+ "View": "കാണുക",
+ "Model extraction": "മോഡൽ എക്സ്ട്രാക്ഷൻ",
+ "Model conversion": "മോഡൽ കൺവേർഷൻ",
+ "Pth file": "Pth ഫയൽ",
+ "Output of the pth file": "Pth ഫയലിന്റെ പ്രോഡക്റ്റ്",
+ "# How to Report an Issue on GitHub": "# GitHub-ലെ ഒരു ഇഷ്യൂ റിപ്പോർട്ട് ചെയ്യുന്നതിനുള്ള രീതി",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. നിന്റെ അനുഭവപ്പെടുന്ന ഇഷ്യൂ റെക്കോർഡുചെയ്യുന്നതിന് താഴെ 'Record Screen' ബട്ടൺ ക്ലിക്കുചെയ്യുക.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ഇഷ്യൂ റെക്കോർഡുചെയ്തുതീർക്കുന്നതിനുശേഷം, 'Stop Recording' ബട്ടൺ ക്ലിക്കുചെയ്യുക (അത് തുടർന്നിരിക്കുന്നുമോ എന്ന് താഴെയോ കാണുന്ന ലേബല് അനുസരിച്ച് മാറുന്നു).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) സ്ഥലത്തേക്ക് പോകുക, 'New Issue' ബട്ടൺ ക്ലിക്കുചെയ്യുക.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. നൽകിയ ഇഷ്യൂ ടെംപ്ലേറ്റ് പൂർത്തിയാക്കുക, ആവശ്യമായ വിവരങ്ങളെ ചേർക്കുന്നതിനുശേഷം, പഴയ ഘടനയിൽ റെക്കോർഡുചെയ്ത ഫയൽ അപ്ലോഡ് ചെയ്യുന്നതിന് എസെറ്റ് വിഭാഗം ഉപയോഗിക്കുക.",
+ "Record Screen": "റെക്കോർഡ് സ്ക്രീൻ",
+ "Record": "റെക്കോർഡ്",
+ "Stop Recording": "റെക്കോർഡുനിർത്തുക",
+ "Introduce the model .pth path": "മോഡൽ .pth പാത പരിചയപ്പെടുക",
+ "See Model Information": "മോഡൽ വിവരങ്ങൾ കാണുക",
+ "## Download Model": "## മോഡൽ ഡൗൺലോഡ്",
+ "Model Link": "മോഡൽ ലിങ്ക്",
+ "Introduce the model link": "മോഡൽ ലിങ്ക് പരിചയപ്പെടുക",
+ "Download Model": "മോഡൽ ഡൗൺലോഡ്",
+ "## Drop files": "## ഫയലുകൾ ഇടുക",
+ "## Search Model": "## മോഡൽ തിരയൽ",
+ "Search": "തിരയൽ",
+ "Introduce the model name to search.": "തിരയുന്നതിനായി മോഡൽ പേര് അറിയിക്കുക.",
+ "We couldn't find models by that name.": "അനുബന്ധമായ പേരിൽ മോഡൽസ് കണ്ടെത്താനായില്ല.",
+
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "നിനക്ക് .pth ഫയലുകളും .index ഫയലുകളും ഇവിടെ ഡ്രാഗ് ചെയ്യുക. ഒന്നുകിട്ട് പിന്നെ മറ്റൊന്നു ഡ്രാഗ് ചെയ്യുക.",
+ "TTS Voices": "TTS വോയ്സുകൾ",
+ "Select the TTS voice to use for the conversion.": "മാറ്റത്തിനായി ഉപയോഗിക്കാൻ TTS വോയ്സ് തിരഞ്ഞെടുക്കുക.",
+ "Text to Synthesize": "സിന്തിയസൈസ് ചെയ്യുന്ന ടെക്സ്റ്റ്",
+ "Enter the text to synthesize.": "സിന്തിയസൈസ് ചെയ്യാൻ ടെക്സ്റ്റ് നൽകുക.",
+ "Or you can upload a .txt file": "അല്ലെങ്കിൽ .txt ഫയൽ അപ്ലോഡ് ചെയ്യാം",
+ "Enter text to synthesize": "സിന്തിയസൈസ് ചെയ്യാൻ ടെക്സ്റ്റ് നൽകുക",
+ "Output Path for TTS Audio": "TTS ഓഡിയോക്ക് ഔട്ട്പുട്ട് പാത",
+ "Output Path for RVC Audio": "RVC ഓഡിയോക്ക് ഔട്ട്പുട്ട് പാത",
+ "Enable Applio integration with Discord presence": "Discord പ്രസന്നതയോട് Applio ഇൻറഗ്രേഷൻ സജീവമാക്കുക",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "ഇത് Discord-നായിരിക്കുന്ന നിലാവ് കാണാനുള്ള സാധ്യത സജീവമാക്കും.",
+ "Enable Applio integration with applio.org/models using flask": "flask ഉപയോഗിച്ച് applio.org/models ഇൻറഗ്രേഷൻ Applio സജീവമാക്കുക",
+ "It will activate the possibility of downloading models with a click from the website.": "ഇത് വെബ്സൈറ്റിൽ ഒരു ക്ലിക്ക് ചെയ്യുമ്പോൾ മോഡലുകൾ ഡൗൺലോഡ് ചെയ്യാനുള്ള സാധ്യത സജീവമാക്കും.",
+ "Enable fake GPU": "വഞ്ചി ജിപിയു ഇയക്കുക",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "പ്രശിക്ഷണം തറന്ന് നിലവിലുള്ളതിന് ജിപിയു ഇല്ലാതെ പ്രസ്താവിക്കുന്നതിനായി തിരഞ്ഞെടുക്കുന്നത് അനുവദിക്കാൻ 'ഫെയ്ക് ജിപിയു' ഓപ്ഷൻ സജ്ജമാക്കുക എന്ന് ക്രമീകരിക്കാൻ തിരിച്ചുവരുക. ",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "പ്രശിക്ഷണം തുടങ്ങുന്നു. എങ്കിലും, ദയവായി ശ്രദ്ധിക്കുക എന്നത് നിങ്ങളുടെ ഉപകരണത്തിൽ GPU സാധ്യതകൾ ഇല്ലാത്തതാണ്, അതിനാൽ പ്രശിക്ഷണം അനുവദനീയമല്ല. ഈ ഓപ്ഷൻ ഇപ്പോൾ പരീക്ഷണങ്ങളിക്കായാണ്. (ഈ ഓപ്ഷൻ അപ്ലിയോ പുനഃസജ്ജമാക്കും)",
+ "Theme": "തീം",
+ "Select the theme you want to use. (Requires restarting Applio)": "നിനക്ക് ഉപയോഗിക്കാൻ എന്താണെന്ന് നിങ്ങളുടെ തീം തിരഞ്ഞെടുക്കുക. (Applio പുനഃസജീവമാക്കാൻ ആവശ്യമാണ്)",
+ "Language": "ഭാഷ",
+ "Select the language you want to use. (Requires restarting Applio)": "നിങ്ങളുടെ ഉപയോഗത്തിന് നിങ്ങളുടെ ഭാഷ തിരഞ്ഞെടുക്കുക. (Applio പുനഃസജീവമാക്കാൻ ആവശ്യമാണ്)",
+ "Plugin Installer": "പ്ലഗിൻ ഇൻസ്റ്റാൾലർ",
+ "Drag your plugin.zip to install it": "അതിനായി നിനക്ക് നിന്നെത്തിയ .zip ഫയല് ഇൻസ്റ്റാൾ ചെയ്യാൻ ഇവിടെ ഡ്രാഗ് ചെയ്യുക",
+ "Version Checker": "വേര്ഷന് ചെക്കർ",
+ "Check which version of Applio is the latest to see if you need to update.": "നിനക്ക് അപ്ഡേറ്റുചെയ്യേണ്ടതോ എന്ന് അപ്ലിയോയുടെ ഏറ്റവും പുതിയ പതിപ്പായത് പരിശോധിക്കുക.",
+ "Check for updates": "അപ്ഡേറ്റുകൾ പരിശോധിക്കുക"
+}
diff --git a/assets/i18n/languages/mr_MR.json b/assets/i18n/languages/mr_MR.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd5726fa11e1b34cc32648f0279acdfecacb5157
--- /dev/null
+++ b/assets/i18n/languages/mr_MR.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "अल्टिमेट व्हॉइस क्लोनिंग टूल, अप्रतिम शक्ती, मॉड्युलरिटी आणि वापरकर्ता-अनुकूल अनुभवासाठी काळजीपूर्वक ऑप्टिमाइझ केलेले.",
+ "This section contains some extra utilities that often may be in experimental phases.": "या विभागात काही अतिरिक्त उपयोगिता आहेत ज्या बर्याचदा प्रायोगिक टप्प्यात असू शकतात.",
+ "Output Information": "आउटपुट माहिती",
+ "The output information will be displayed here.": "आउटपुट माहिती येथे प्रदर्शित केली जाईल.",
+ "Inference": "निष्कर्ष",
+ "Train": "आगगाडी",
+ "Extra": "अतिरिक्त",
+ "Merge Audios": "Merges Tऑडिओ",
+ "Processing": "प्रोसेसिंग",
+ "Audio Analyzer": "ऑडिओ विश्लेषक",
+ "Model Information": "मॉडेल माहिती",
+ "Plugins": "प्लगइन्स",
+ "Download": "डाउनलोड करा",
+ "Report a Bug": "बग ची नोंद करा",
+ "Settings": "सेटिंग्स",
+ "Preprocess": "पूर्वप्रक्रिया",
+ "Model Name": "मॉडेलचे नाव",
+ "Name of the new model.": "नवीन मॉडेलचे नाव .",
+ "Enter model name": "मॉडेल नाव प्रविष्ट करा",
+ "Dataset Path": "डेटासेट पथ",
+ "Path to the dataset folder.": "डेटासेट फोल्डरचा मार्ग.",
+ "Refresh Datasets": "डेटासेट रिफ्रेश करा",
+ "Dataset Creator": "डेटासेट निर्माता",
+ "Dataset Name": "डेटासेट चे नाव",
+ "Name of the new dataset.": "नवीन डेटासेटचे नाव.",
+ "Enter dataset name": "डेटासेट नाव प्रविष्ट करा",
+ "Upload Audio Dataset": "ऑडिओ डेटासेट अपलोड करा",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ऑडिओ फाइल डेटासेटमध्ये यशस्वीरित्या जोडली गेली आहे. कृपया प्रीप्रोसेस बटणावर क्लिक करा.",
+ "Enter dataset path": "डेटासेट मार्ग प्रविष्ट करा",
+ "Sampling Rate": "नमुना दर",
+ "The sampling rate of the audio files.": "ऑडिओ फायलींचे नमुने घेण्याचा दर.",
+ "Model Architecture": "आरव्हीसी आवृत्ती",
+ "Version of the model architecture.": "मॉडेलची आरव्हीसी आवृत्ती.",
+ "Preprocess Dataset": "Preprocess Dataset",
+ "Extract": "अर्क",
+ "Hop Length": "हॉप लांबी",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "प्रणालीला महत्त्वपूर्ण खेळपट्टी बदलाकडे संक्रमण होण्यासाठी लागणारा कालावधी दर्शवितो. लहान हॉप लांबीसाठी अनुमानासाठी अधिक वेळ लागतो परंतु खेळपट्टीची अचूकता जास्त असते.",
+ "Batch Size": "बॅच आकार",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "हे आपल्या जीपीयूच्या उपलब्ध व्हीआरएएमसह संरेखित करण्याचा सल्ला दिला जातो. 4 ची सेटिंग सुधारित अचूकता परंतु हळू प्रक्रिया प्रदान करते, तर 8 वेगवान आणि मानक परिणाम प्रदान करते.",
+ "Save Every Epoch": "प्रत्येक युग वाचवा",
+ "Determine at how many epochs the model will saved at.": "मॉडेल किती युगात जतन करेल हे ठरवा.",
+ "Total Epoch": "एकूण युग",
+ "Specifies the overall quantity of epochs for the model training process.": "मॉडेल प्रशिक्षण प्रक्रियेसाठी युगांची एकूण मात्रा निर्दिष्ट करते.",
+ "Pretrained": "पूर्वप्रशिक्षित",
+ "Save Only Latest": "फक्त लेटेस्ट सेव्ह करा",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "ही सेटिंग सक्षम केल्याने जी आणि डी फायली केवळ त्यांच्या अलीकडील आवृत्त्या वाचवतील आणि स्टोरेज स्पेस प्रभावीपणे जतन करतील.",
+ "Save Every Weights": "प्रत्येक वजन वाचवा",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "ही सेटिंग आपल्याला प्रत्येक युगाच्या शेवटी मॉडेलची वजने वाचविण्यास सक्षम करते.",
+ "Custom Pretrained": "सानुकूल पूर्वप्रशिक्षित",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "सानुकूल पूर्वप्रशिक्षित मॉडेल्स चा वापर केल्यास उत्कृष्ट परिणाम मिळू शकतात, कारण विशिष्ट वापर प्रकरणानुसार सर्वात योग्य पूर्वप्रशिक्षित मॉडेल्स ची निवड केल्यास कार्यक्षमता लक्षणीय वाढू शकते.",
+ "Upload Pretrained Model": "पूर्वप्रशिक्षित मॉडेल अपलोड करा",
+ "Refresh Custom Pretraineds": "रिफ्रेश सानुकूल पूर्वप्रशिक्षित",
+ "Pretrained Custom Settings": "पूर्वप्रशिक्षित सानुकूल सेटिंग्ज",
+ "The file you dropped is not a valid pretrained file. Please try again.": "आपण टाकलेली फाईल वैध पूर्वप्रशिक्षित फाइल नाही. कृपया पुन्हा प्रयत्न करा.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "ड्रॉपडाउन मेनूमध्ये पूर्वप्रशिक्षित फाइल पाहण्यासाठी रिफ्रेश बटणावर क्लिक करा.",
+ "Pretrained G Path": "सानुकूल पूर्वप्रशिक्षित जी",
+ "Pretrained D Path": "सानुकूल पूर्वप्रशिक्षित डी",
+ "GPU Settings": "जीपीयू सेटिंग्स",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "प्रगत जीपीयू सेटिंग्ज सेट करा, चांगल्या जीपीयू आर्किटेक्चर असलेल्या वापरकर्त्यांसाठी शिफारस केली आहे.",
+ "GPU Custom Settings": "जीपीयू सानुकूल सेटिंग्स",
+ "GPU Number": "जीपीयू नंबर",
+ "0 to ∞ separated by -": "0 ते ∞ वेगळे केले जातात -",
+ "GPU Information": "जीपीयू माहिती",
+ "Pitch Guidance": "खेळपट्टी मार्गदर्शन",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "खेळपट्टीमार्गदर्शनाचा वापर करून मूळ आवाजाच्या खेळपट्टीसह त्याच्या अंतर्मुखतेचे प्रतिबिंब उमटविणे शक्य होते. हे वैशिष्ट्य गायन आणि इतर दृश्यांसाठी विशेषतः मौल्यवान आहे जिथे मूळ सूर किंवा पिच पॅटर्न जतन करणे आवश्यक आहे.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "स्वत: चे प्रशिक्षण देताना पूर्वप्रशिक्षित मॉडेल्स वापरा. हा दृष्टिकोन प्रशिक्षण कालावधी कमी करतो आणि एकंदर गुणवत्ता वाढवतो.",
+ "Extract Features": "अर्क वैशिष्ट्ये",
+ "Start Training": "प्रशिक्षण सुरू करा",
+ "Generate Index": "इंडेक्स तयार करा",
+ "Voice Model": "व्हॉइस मॉडेल",
+ "Select the voice model to use for the conversion.": "रूपांतरणासाठी वापरण्यासाठी व्हॉइस मॉडेल निवडा.",
+ "Index File": "अनुक्रमणिका फाइल",
+ "Select the index file to use for the conversion.": "रूपांतरणासाठी वापरण्यासाठी अनुक्रमणिका फाइल निवडा.",
+ "Refresh": "टवटवी आणणे",
+ "Unload Voice": "आवाज अनलोड करा",
+ "Single": "अविवाहित",
+ "Upload Audio": "ऑडिओ अपलोड करा",
+ "Select Audio": "ऑडिओ निवडा",
+ "Select the audio to convert.": "रूपांतरित करण्यासाठी ऑडिओ निवडा.",
+ "Advanced Settings": "प्रगत सेटिंग्ज",
+ "Clear Outputs (Deletes all audios in assets/audios)": "स्पष्ट आउटपुट (मालमत्ता / ऑडिओमधील सर्व ऑडिओ हटवतात)",
+ "Custom Output Path": "सानुकूल आउटपुट पथ",
+ "Output Path": "आउटपुट पथ",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "तो मार्ग जिथे आउटपुट ऑडिओ जतन केला जाईल, मालमत्ता / ऑडिओ / output.wav मध्ये डिफॉल्टद्वारे",
+ "Split Audio": "स्प्लिट ऑडिओ",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "काही प्रकरणांमध्ये चांगले परिणाम मिळविण्यासाठी अनुमानासाठी ऑडिओचे तुकडे करा.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "गायन रूपांतरणासाठी शिफारस केलेल्या आपल्या निष्कर्षांवर सॉफ्ट ऑटोट्यून लागू करा.",
+ "Clean Audio": "स्वच्छ ऑडिओ",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ध्वनी शोध अल्गोरिदम वापरुन आपले ऑडिओ आउटपुट स्वच्छ करा, ऑडिओ बोलण्यासाठी शिफारस केली आहे.",
+ "Clean Strength": "स्वच्छ शक्ती",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "आपल्याला हव्या असलेल्या ऑडिओमध्ये क्लीन-अप पातळी सेट करा, आपण ते जितके वाढवाल तितके ते साफ होईल, परंतु ऑडिओ अधिक संकुचित होण्याची शक्यता आहे.",
+ "Pitch": "खेळपट्टी",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ऑडिओची खेळपट्टी सेट करा, किंमत जितकी जास्त तितकी खेळपट्टी जास्त.",
+ "Filter Radius": "फिल्टर त्रिज्या",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "जर संख्या तीनपेक्षा जास्त किंवा समान असेल तर संकलित टोन परिणामांवर मध्यम फिल्टरिंग वापरल्यास श्वसन कमी होण्याची क्षमता असते.",
+ "Search Feature Ratio": "शोध वैशिष्ट्य गुणोत्तर",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "अनुक्रमणिका संचिकेने टाकलेला प्रभाव; उच्च मूल्य अधिक प्रभावाशी संबंधित आहे. तथापि, कमी मूल्यांची निवड केल्याने ऑडिओमध्ये असलेल्या कलाकृती कमी होण्यास मदत होते.",
+ "Volume Envelope": "Volume लिफाफा",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "आउटपुटच्या व्हॉल्यूम लिफाफ्यासह पर्याय किंवा मिश्रण करा. गुणोत्तर 1 च्या जितके जवळ असेल तितके आउटपुट लिफाफा वापरला जातो.",
+ "Protect Voiceless Consonants": "आवाजहीन व्यंजनांचे रक्षण करा",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "इलेक्ट्रो-ध्वनिक फाटणे आणि इतर कलाकृती टाळण्यासाठी विशिष्ट व्यंजने आणि श्वासोच्छवासाच्या आवाजांचे रक्षण करा. पॅरामीटरला त्याच्या 0.5 च्या जास्तीत जास्त मूल्यावर खेचणे व्यापक संरक्षण प्रदान करते. तथापि, हे मूल्य कमी केल्याने अनुक्रमणिका प्रभाव संभाव्यत: कमी करताना संरक्षणाची व्याप्ती कमी होऊ शकते.",
+ "Pitch extraction algorithm": "पिच निष्कर्षण अल्गोरिदम",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ऑडिओ रूपांतरणासाठी वापरण्यासाठी पिच एक्सट्रॅक्शन अल्गोरिदम. डिफॉल्ट अल्गोरिदम आरएमव्हीपीई आहे, ज्याची शिफारस बहुतेक प्रकरणांमध्ये केली जाते.",
+ "Convert": "धर्मांतर करा",
+ "Export Audio": "निर्यात ऑडिओ",
+ "Batch": "तुकडी",
+ "Input Folder": "इनपुट फोल्डर",
+ "Select the folder containing the audios to convert.": "रूपांतरित करण्यासाठी ऑडिओ असलेले फोल्डर निवडा.",
+ "Enter input path": "इनपुट पथ प्रविष्ट करा",
+ "Output Folder": "आउटपुट फोल्डर",
+ "Select the folder where the output audios will be saved.": "फोल्डर निवडा जेथे आउटपुट ऑडिओ सेव्ह केले जातील.",
+ "Enter output path": "आउटपुट पथ प्रविष्ट करा",
+ "Get information about the audio": "ऑडिओबद्दल माहिती मिळवा",
+ "Information about the audio file": "ऑडिओ फाईलची माहिती",
+ "Waiting for information...": "माहितीच्या प्रतीक्षेत...",
+ "## Voice Blender": "## व्हॉइस ब्लेंडर",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "दोन व्हॉईस मॉडेल निवडा, आपली इच्छित मिश्रण टक्केवारी सेट करा आणि त्यांना पूर्णपणे नवीन आवाजात मिसळा.",
+ "Voice Blender": "व्हॉइस ब्लेंडर",
+ "Drag and drop your model here": "आपले मॉडेल येथे खेचून टाका आणि सोडा",
+ "You can also use a custom path.": "आपण सानुकूल मार्ग देखील वापरू शकता.",
+ "Blend Ratio": "मिश्रण गुणोत्तर",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "एका किंवा दुसर्या बाजूला स्थिती अधिक समायोजित केल्याने मॉडेल पहिल्या किंवा दुसर्या बाजूला अधिक समान होईल.",
+ "Fusion": "फ्यूजन",
+ "Path to Model": "मॉडेल चा मार्ग",
+ "Enter path to model": "मॉडेलचा मार्ग प्रविष्ट करा",
+ "Model information to be placed": "मॉडेल माहिती ठेवली जाईल",
+ "Inroduce the model information": "मॉडेल ची माहिती सादर करा",
+ "The information to be placed in the model (You can leave it blank or put anything).": "मॉडेलमध्ये ठेवली जाणारी माहिती (आपण ती रिकामी ठेवू शकता किंवा काहीही टाकू शकता).",
+ "View model information": "मॉडेल माहिती पहा",
+ "Introduce the model pth path": "मॉडेल पीटीएच पथ सादर करा",
+ "View": "पहा",
+ "Model extraction": "मॉडेल निष्कर्षण",
+ "Model conversion": "मॉडेल रूपांतरण",
+ "Pth file": "पीटीएच फाईल",
+ "Output of the pth file": "पीटीएच फाईलचे आउटपुट",
+ "# How to Report an Issue on GitHub": "# गिटहबवर एखाद्या समस्येची नोंद कशी करावी",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. आपण अनुभवत असलेल्या समस्येचे रेकॉर्डिंग सुरू करण्यासाठी खालील 'रेकॉर्ड स्क्रीन' बटणावर क्लिक करा.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "२. अंक रेकॉर्ड िंग पूर्ण झाल्यावर 'स्टॉप रेकॉर्डिंग' बटणावर क्लिक करा (तेच बटण, पण तुम्ही सक्रियपणे रेकॉर्डिंग करत आहात की नाही यावर अवलंबून लेबल बदलते).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [गिटहब इश्यूज] (https://github.com/IAHispano/Applio/issues) वर जा आणि 'न्यू इश्यू' बटणावर क्लिक करा.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. प्रदान केलेले इश्यू टेम्पलेट पूर्ण करा, आवश्यकतेनुसार तपशील समाविष्ट करण्याची खात्री करा आणि मागील चरणातून रेकॉर्ड केलेली फाइल अपलोड करण्यासाठी मालमत्ता विभागाचा वापर करा.",
+ "Record Screen": "रेकॉर्ड स्क्रीन",
+ "Record": "नोंदणे",
+ "Stop Recording": "रेकॉर्डिंग थांबवा",
+ "Introduce the model .pth path": "मॉडेल .पीटीएच पथ सादर करा",
+ "See Model Information": "मॉडेल माहिती पहा",
+ "## Download Model": "## मॉडेल डाऊनलोड करा",
+ "Model Link": "मॉडेल लिंक",
+ "Introduce the model link": "मॉडेल लिंक ची ओळख करून द्या",
+ "Download Model": "मॉडेल डाउनलोड करा",
+ "## Drop files": "## फाइल्स ड्रॉप करा",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "आपली .pth फाइल आणि .अनुक्रमणिका फाईल या जागेत खेचून घ्या. एकाला ओढून घ्या आणि मग दुसरं.",
+ "TTS Voices": "टीटीएस आवाज",
+ "Select the TTS voice to use for the conversion.": "रूपांतरणासाठी वापरण्यासाठी टीटीएस आवाज निवडा.",
+ "Text to Synthesize": "संश्लेषित करण्यासाठी मजकूर",
+ "Enter the text to synthesize.": "संश्लेषित करण्यासाठी मजकूर प्रविष्ट करा.",
+ "Or you can upload a .txt file": "किंवा तुम्ही .txt फाईल अपलोड करू शकता",
+ "Enter text to synthesize": "संश्लेषित करण्यासाठी मजकूर प्रविष्ट करा",
+ "Output Path for TTS Audio": "टीटीएस ऑडिओसाठी आउटपुट पथ",
+ "Output Path for RVC Audio": "आरव्हीसी ऑडिओसाठी आउटपुट पथ",
+ "Enable Applio integration with Discord presence": "डिस्कॉर्ड उपस्थितीसह अनुप्रयोग एकीकरण सक्षम करा",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "हे डिस्कॉर्डमध्ये सध्याची ऍप्लिओ क्रियाकलाप प्रदर्शित करण्याची शक्यता सक्रिय करेल.",
+ "Enable Applio integration with applio.org/models using flask": "फ्लास्क वापरुन applio.org/models अनुप्रयोगा एकीकरण सक्षम करा",
+ "It will activate the possibility of downloading models with a click from the website.": "हे वेबसाइटवरून एका क्लिकवर मॉडेल्स डाउनलोड करण्याची शक्यता सक्रिय करेल.",
+ "Theme": "थीम :",
+ "Select the theme you want to use. (Requires restarting Applio)": "आपण वापरू इच्छित थीम निवडा. (अनुप्रयोग पुन्हा सुरू करणे आवश्यक आहे)",
+ "Language": "भाषा",
+ "Select the language you want to use. (Requires restarting Applio)": "आपल्याला जी भाषा वापरायची आहे ती निवडा. (अनुप्रयोग पुन्हा सुरू करणे आवश्यक आहे)",
+ "Plugin Installer": "Plugin Installer",
+ "Drag your plugin.zip to install it": "ते स्थापित करण्यासाठी आपले plugin.zip खेचून घ्या",
+ "Version Checker": "Version Checker",
+ "Check which version of Applio is the latest to see if you need to update.": "आपल्याला अद्ययावत करण्याची आवश्यकता आहे की नाही हे पाहण्यासाठी अ ॅप्लिओची कोणती आवृत्ती नवीनतम आहे ते पहा.",
+ "Check for updates": "अपडेट्ससाठी पहा"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/ms_MS.json b/assets/i18n/languages/ms_MS.json
new file mode 100644
index 0000000000000000000000000000000000000000..4c89bed5a31a202bb9ffb14bda4d774ea190d64a
--- /dev/null
+++ b/assets/i18n/languages/ms_MS.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Alat pengklonan suara muktamad, dioptimumkan dengan teliti untuk kuasa yang tiada tandingan, modulariti, dan pengalaman mesra pengguna.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Bahagian ini mengandungi beberapa utiliti tambahan yang selalunya berada dalam fasa percubaan.",
+ "Output Information": "Maklumat Output",
+ "The output information will be displayed here.": "Maklumat output akan dipaparkan di sini.",
+ "Inference": "Inferens",
+ "Train": "Kereta api",
+ "Extra": "Tambahan",
+ "Merge Audios": "Mencantumkan Audio",
+ "Processing": "Pemprosesan",
+ "Audio Analyzer": "Penganalisis Audio",
+ "Model Information": "Maklumat Model",
+ "Plugins": "Plugin",
+ "Download": "Muat turun",
+ "Report a Bug": "Laporkan pepijat",
+ "Settings": "Seting",
+ "Preprocess": "Praproses",
+ "Model Name": "Nama Model",
+ "Name of the new model.": "Nama model baharu.",
+ "Enter model name": "Masukkan nama model",
+ "Dataset Path": "Laluan Set Data",
+ "Path to the dataset folder.": "Laluan ke folder set data.",
+ "Refresh Datasets": "Menyegar semula Set Data",
+ "Dataset Creator": "Pencipta Set Data",
+ "Dataset Name": "Nama Set Data",
+ "Name of the new dataset.": "Nama set data baru.",
+ "Enter dataset name": "Masukkan nama set data",
+ "Upload Audio Dataset": "Muat naik Set Data Audio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Fail audio telah berjaya ditambahkan pada set data. Sila klik butang praproses.",
+ "Enter dataset path": "Memasukkan laluan set data",
+ "Sampling Rate": "Kadar Persampelan",
+ "The sampling rate of the audio files.": "Kadar pensampelan fail audio.",
+ "Model Architecture": "Versi RVC",
+ "Version of the model architecture.": "Versi RVC model.",
+ "Preprocess Dataset": "Set Data Praproses",
+ "Extract": "Cabutan",
+ "Hop Length": "Panjang Hop",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Menunjukkan tempoh yang diperlukan untuk sistem beralih kepada perubahan padang yang ketara. Panjang hop yang lebih kecil memerlukan lebih banyak masa untuk kesimpulan tetapi cenderung menghasilkan ketepatan padang yang lebih tinggi.",
+ "Batch Size": "Saiz kelompok",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Anda dinasihatkan untuk menyelaraskannya dengan VRAM GPU anda yang tersedia. Tetapan 4 menawarkan ketepatan yang lebih baik tetapi pemprosesan yang lebih perlahan, manakala 8 memberikan hasil yang lebih cepat dan standard.",
+ "Save Every Epoch": "Simpan Setiap Zaman",
+ "Determine at how many epochs the model will saved at.": "Tentukan berapa zaman model akan disimpan.",
+ "Total Epoch": "Jumlah Zaman",
+ "Specifies the overall quantity of epochs for the model training process.": "Menentukan kuantiti keseluruhan zaman untuk proses latihan model.",
+ "Pretrained": "Dipralatih",
+ "Save Only Latest": "Simpan Terkini Sahaja",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Mendayakan tetapan ini akan menyebabkan fail G dan D menyimpan hanya versi terbaru mereka, menjimatkan ruang storan dengan berkesan.",
+ "Save Every Weights": "Jimat Setiap Berat",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Tetapan ini membolehkan anda menjimatkan berat model pada akhir setiap zaman.",
+ "Custom Pretrained": "Pralatih Tersuai",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Menggunakan model pra-latihan tersuai boleh membawa kepada hasil yang unggul, kerana memilih model pra-latihan yang paling sesuai yang disesuaikan dengan kes penggunaan tertentu dapat meningkatkan prestasi dengan ketara.",
+ "Upload Pretrained Model": "Muat naik Model Pralatih",
+ "Refresh Custom Pretraineds": "Menyegar Semula Pralatihan Tersuai",
+ "Pretrained Custom Settings": "Seting Tersuai Pralatih",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Fail yang anda gugurkan bukan fail pralatih yang sah. Sila cuba lagi.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Klik butang segar semula untuk melihat fail pralatih dalam menu juntai bawah.",
+ "Pretrained G Path": "Custom Pretrained G",
+ "Pretrained D Path": "Custom Pretrained D",
+ "GPU Settings": "Seting GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Menetapkan tetapan GPU lanjutan, disyorkan untuk pengguna dengan seni bina GPU yang lebih baik.",
+ "GPU Custom Settings": "Seting Tersuai GPU",
+ "GPU Number": "Nombor GPU",
+ "0 to ∞ separated by -": "0 hingga ∞ dipisahkan oleh -",
+ "GPU Information": "Maklumat GPU",
+ "Pitch Guidance": "Panduan Padang",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Dengan menggunakan panduan padang, ia boleh dilaksanakan untuk mencerminkan intonasi suara asal, termasuk padangnya. Ciri ini amat berharga untuk nyanyian dan senario lain di mana mengekalkan corak melodi atau padang asal adalah penting.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Gunakan model yang telah dilatih semasa melatih anda sendiri. Pendekatan ini mengurangkan tempoh latihan dan meningkatkan kualiti keseluruhan.",
+ "Extract Features": "Ciri Ekstrak",
+ "Start Training": "Mulakan Latihan",
+ "Generate Index": "Menjana Indeks",
+ "Voice Model": "Model Suara",
+ "Select the voice model to use for the conversion.": "Pilih model suara untuk digunakan untuk penukaran.",
+ "Index File": "Fail Indeks",
+ "Select the index file to use for the conversion.": "Pilih fail indeks untuk digunakan untuk penukaran.",
+ "Refresh": "Refresh",
+ "Unload Voice": "Memunggah Suara",
+ "Single": "Tunggal",
+ "Upload Audio": "Muat naik Audio",
+ "Select Audio": "Pilih Audio",
+ "Select the audio to convert.": "Pilih audio untuk ditukar.",
+ "Advanced Settings": "Seting Lanjutan",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Kosongkan Output (Memadamkan semua audio dalam aset/audio)",
+ "Custom Output Path": "Laluan Output Tersuai",
+ "Output Path": "Laluan Output",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Laluan di mana audio output akan disimpan, secara lalai dalam aset/audio/output.wav",
+ "Split Audio": "Audio Pisah",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Pisahkan audio kepada ketulan untuk kesimpulan untuk mendapatkan hasil yang lebih baik dalam beberapa kes.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Gunakan autotune lembut pada kesimpulan anda, disyorkan untuk penukaran nyanyian.",
+ "Clean Audio": "Audio Bersih",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Bersihkan output audio anda menggunakan algoritma pengesanan bunyi, disyorkan untuk bercakap audio.",
+ "Clean Strength": "Kekuatan Bersih",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Tetapkan tahap pembersihan ke audio yang anda mahukan, semakin banyak anda meningkatkannya semakin banyak ia akan membersihkan, tetapi ada kemungkinan audio akan lebih dimampatkan.",
+ "Pitch": "Pitch",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Tetapkan padang audio, semakin tinggi nilai, semakin tinggi padang.",
+ "Filter Radius": "Jejari Penapis",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Sekiranya bilangannya lebih besar daripada atau sama dengan tiga, menggunakan penapisan median pada hasil nada yang dikumpulkan berpotensi untuk mengurangkan pernafasan.",
+ "Search Feature Ratio": "Nisbah Ciri Carian",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Pengaruh yang dikenakan oleh fail indeks; Nilai yang lebih tinggi sepadan dengan pengaruh yang lebih besar. Walau bagaimanapun, memilih nilai yang lebih rendah boleh membantu mengurangkan artifak yang terdapat dalam audio.",
+ "Volume Envelope": "Sampul Kelantangan",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Gantikan atau gabungkan dengan sampul kelantangan output. Semakin dekat nisbahnya kepada 1, semakin banyak sampul output digunakan.",
+ "Protect Voiceless Consonants": "Lindungi Konsonan Tanpa Suara",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Lindungi konsonan yang berbeza dan bunyi pernafasan untuk mengelakkan koyakan elektro-akustik dan artifak lain. Menarik parameter ke nilai maksimum 0.5 menawarkan perlindungan komprehensif. Walau bagaimanapun, mengurangkan nilai ini mungkin mengurangkan tahap perlindungan sambil berpotensi mengurangkan kesan pengindeksan.",
+ "Pitch extraction algorithm": "Algoritma pengekstrakan padang",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritma pengekstrakan padang untuk digunakan untuk penukaran audio. Algoritma lalai ialah rmvpe, yang disyorkan untuk kebanyakan kes.",
+ "Convert": "Menukar",
+ "Export Audio": "Eksport Audio",
+ "Batch": "Kelompok",
+ "Input Folder": "Input Folder",
+ "Select the folder containing the audios to convert.": "Pilih folder yang mengandungi audio untuk ditukar.",
+ "Enter input path": "Masukkan laluan input",
+ "Output Folder": "Output Folder",
+ "Select the folder where the output audios will be saved.": "Pilih folder di mana audio output akan disimpan.",
+ "Enter output path": "Masukkan laluan output",
+ "Get information about the audio": "Mendapatkan maklumat tentang audio",
+ "Information about the audio file": "Maklumat mengenai fail audio",
+ "Waiting for information...": "Menunggu maklumat...",
+ "## Voice Blender": "## Pengisar Suara",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Pilih dua model suara, tetapkan peratusan campuran yang anda inginkan, dan gabungkannya ke dalam suara yang sama sekali baru.",
+ "Voice Blender": "Pengisar Suara",
+ "Drag and drop your model here": "Seret dan lepas model anda di sini",
+ "You can also use a custom path.": "Anda juga boleh menggunakan laluan tersuai.",
+ "Blend Ratio": "Nisbah Campuran",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Melaraskan kedudukan lebih ke arah satu sisi atau yang lain akan menjadikan model lebih serupa dengan yang pertama atau kedua.",
+ "Fusion": "Gabungan",
+ "Path to Model": "Laluan ke Model",
+ "Enter path to model": "Masukkan laluan ke model",
+ "Model information to be placed": "Maklumat model yang akan diletakkan",
+ "Inroduce the model information": "Menghasut maklumat model",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Maklumat yang akan diletakkan dalam model (Anda boleh membiarkannya kosong atau meletakkan apa-apa).",
+ "View model information": "Lihat maklumat model",
+ "Introduce the model pth path": "Perkenalkan laluan pth model",
+ "View": "Lihat",
+ "Model extraction": "Pengekstrakan model",
+ "Model conversion": "Penukaran model",
+ "Pth file": "Pth fail",
+ "Output of the pth file": "Output fail pth",
+ "# How to Report an Issue on GitHub": "# Cara Melaporkan Isu di GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klik pada butang 'Skrin Rekod' di bawah untuk mula merakam isu yang anda alami.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Setelah anda selesai merakam isu ini, klik pada butang 'Hentikan Rakaman' (butang yang sama, tetapi label berubah bergantung pada sama ada anda merakam secara aktif atau tidak).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Pergi ke [Isu GitHub](https://github.com/IAHispano/Applio/issues) dan klik pada butang 'Isu Baru'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Lengkapkan templat isu yang disediakan, pastikan untuk memasukkan butiran mengikut keperluan, dan gunakan bahagian aset untuk memuat naik fail yang dirakam dari langkah sebelumnya.",
+ "Record Screen": "Skrin Rakam",
+ "Record": "Rekod",
+ "Stop Recording": "Hentikan Rakaman",
+ "Introduce the model .pth path": "Memperkenalkan model laluan .pth",
+ "See Model Information": "Lihat Maklumat Model",
+ "## Download Model": "## Muat Turun Model",
+ "Model Link": "Pautan Model",
+ "Introduce the model link": "Memperkenalkan pautan model",
+ "Download Model": "Model Muat Turun",
+ "## Drop files": "## Jatuhkan fail",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Seret fail .pth dan fail .index anda ke dalam ruang ini. Seret satu dan kemudian yang lain.",
+ "TTS Voices": "Suara TTS",
+ "Select the TTS voice to use for the conversion.": "Pilih suara TTS untuk digunakan bagi penukaran.",
+ "Text to Synthesize": "Teks untuk Mensintesis",
+ "Enter the text to synthesize.": "Masukkan teks untuk mensintesiskan.",
+ "Or you can upload a .txt file": "Atau anda boleh memuat naik fail .txt",
+ "Enter text to synthesize": "Masukkan teks untuk mensintesis saiz",
+ "Output Path for TTS Audio": "Laluan output untuk TTS Audio",
+ "Output Path for RVC Audio": "Laluan Output untuk Audio RVC",
+ "Enable Applio integration with Discord presence": "Dayakan integrasi Applio dengan kehadiran Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Ia akan mengaktifkan kemungkinan memaparkan aktiviti Applio semasa di Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Dayakan integrasi Applio dengan applio.org/models menggunakan kelalang",
+ "It will activate the possibility of downloading models with a click from the website.": "Ia akan mengaktifkan kemungkinan memuat turun model dengan satu klik dari laman web.",
+ "Theme": "Tema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Pilih tema yang anda mahu gunakan. (Memerlukan memulakan semula Applio)",
+ "Language": "Bahasa",
+ "Select the language you want to use. (Requires restarting Applio)": "Pilih bahasa yang anda mahu gunakan. (Memerlukan memulakan semula Applio)",
+ "Plugin Installer": "Pemasang Plugin",
+ "Drag your plugin.zip to install it": "Seret plugin.zip anda untuk memasangnya",
+ "Version Checker": "Penyemak Versi",
+ "Check which version of Applio is the latest to see if you need to update.": "Semak versi Applio yang terkini untuk melihat sama ada anda perlu mengemas kini.",
+ "Check for updates": "Semak kemas kini"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/nl_NL.json b/assets/i18n/languages/nl_NL.json
new file mode 100644
index 0000000000000000000000000000000000000000..938a719a7f913cc88345d296bf8dc4bcaf5fd41b
--- /dev/null
+++ b/assets/i18n/languages/nl_NL.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Ultieme tool voor het klonen van stemmen, zorgvuldig geoptimaliseerd voor ongeëvenaarde kracht, modulariteit en gebruiksvriendelijke ervaring.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Deze sectie bevat enkele extra hulpprogramma's die zich vaak in experimentele fasen bevinden.",
+ "Output Information": "Output Informatie",
+ "The output information will be displayed here.": "De uitvoerinformatie wordt hier weergegeven.",
+ "Inference": "Conclusie",
+ "Train": "Trein",
+ "Extra": "Extra",
+ "Merge Audios": "Audio samenvoegen",
+ "Processing": "Verwerking",
+ "Audio Analyzer": "Audio Analyzer",
+ "Model Information": "Modelinformatie",
+ "Plugins": "Insteekplaatsen",
+ "Download": "Downloaden",
+ "Report a Bug": "Een bug melden",
+ "Settings": "Instellingen",
+ "Preprocess": "Voorbewerking",
+ "Model Name": "Modelnaam",
+ "Name of the new model.": "Naam van het nieuwe model.",
+ "Enter model name": "Voer de modelnaam in",
+ "Dataset Path": "Pad naar gegevensset",
+ "Path to the dataset folder.": "Pad naar de map met gegevenssets.",
+ "Refresh Datasets": "Gegevenssets vernieuwen",
+ "Dataset Creator": "Maker van gegevenssets",
+ "Dataset Name": "Naam van gegevensset",
+ "Name of the new dataset.": "Naam van de nieuwe dataset.",
+ "Enter dataset name": "Voer de naam van de gegevensset in",
+ "Upload Audio Dataset": "Audiogegevensset uploaden",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Het audiobestand is toegevoegd aan de dataset. Klik op de knop voorbewerking.",
+ "Enter dataset path": "Pad naar gegevensset invoeren",
+ "Sampling Rate": "Bemonsteringsfrequentie",
+ "The sampling rate of the audio files.": "De bemonsteringsfrequentie van de audiobestanden.",
+ "Model Architecture": "RVC-versie",
+ "Version of the model architecture.": "De RVC-versie van het model.",
+ "Preprocess Dataset": "Gegevensset voor het proces",
+ "Extract": "Extract",
+ "Hop Length": "Hop Lengte",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Geeft de tijd aan die nodig is om het systeem over te zetten op een significante toonhoogteverandering. Kleinere hoplengtes hebben meer tijd nodig om conclusies te trekken, maar hebben de neiging om een hogere toonhoogtenauwkeurigheid op te leveren.",
+ "Batch Size": "Batchgrootte",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Het is aan te raden om deze af te stemmen op het beschikbare VRAM van je GPU. Een instelling van 4 biedt verbeterde nauwkeurigheid maar langzamere verwerking, terwijl 8 snellere en standaardresultaten oplevert.",
+ "Save Every Epoch": "Red elk tijdperk",
+ "Determine at how many epochs the model will saved at.": "Bepaal op hoeveel epochs het model wordt opgeslagen.",
+ "Total Epoch": "Totale tijdvak",
+ "Specifies the overall quantity of epochs for the model training process.": "Hiermee geeft u het totale aantal epochs op voor het modeltrainingsproces.",
+ "Pretrained": "Voorgetraind",
+ "Save Only Latest": "Alleen de nieuwste opslaan",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Als u deze instelling inschakelt, worden de G- en D-bestanden alleen hun meest recente versies opgeslagen, waardoor er effectief opslagruimte wordt bespaard.",
+ "Save Every Weights": "Sla alle gewichten op",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Met deze instelling kunt u de gewichten van het model aan het einde van elk tijdperk opslaan.",
+ "Custom Pretrained": "Aangepaste voorgetrainde",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Het gebruik van op maat gemaakte voorgetrainde modellen kan tot superieure resultaten leiden, aangezien het selecteren van de meest geschikte vooraf getrainde modellen die zijn afgestemd op de specifieke gebruikssituatie de prestaties aanzienlijk kan verbeteren.",
+ "Upload Pretrained Model": "Vooraf getraind model uploaden",
+ "Refresh Custom Pretraineds": "Aangepaste vooraf getrainde bestanden vernieuwen",
+ "Pretrained Custom Settings": "Vooraf getrainde aangepaste instellingen",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Het bestand dat u hebt neergezet, is geen geldig vooraf getraind bestand. Probeer het opnieuw.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Klik op de knop Vernieuwen om het vooraf getrainde bestand in het vervolgkeuzemenu te zien.",
+ "Pretrained G Path": "Aangepaste voorgetrainde G",
+ "Pretrained D Path": "Aangepaste voorgetrainde D",
+ "GPU Settings": "GPU-instellingen",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Hiermee stelt u geavanceerde GPU-instellingen in, aanbevolen voor gebruikers met een betere GPU-architectuur.",
+ "GPU Custom Settings": "Aangepaste GPU-instellingen",
+ "GPU Number": "GPU-nummer",
+ "0 to ∞ separated by -": "0 tot ∞ gescheiden door -",
+ "GPU Information": "GPU-informatie",
+ "Pitch Guidance": "Begeleiding bij het veld",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Door gebruik te maken van toonhoogtebegeleiding wordt het mogelijk om de intonatie van de originele stem, inclusief de toonhoogte, te spiegelen. Deze functie is met name waardevol voor zang en andere scenario's waarbij het behoud van de originele melodie of het toonhoogtepatroon essentieel is.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Gebruik vooraf getrainde modellen bij het trainen van uw eigen modellen. Deze aanpak verkort de trainingsduur en verbetert de algehele kwaliteit.",
+ "Extract Features": "Extraheer functies",
+ "Start Training": "Begin met trainen",
+ "Generate Index": "Index genereren",
+ "Voice Model": "Stem Model",
+ "Select the voice model to use for the conversion.": "Selecteer het spraakmodel dat u voor de conversie wilt gebruiken.",
+ "Index File": "Index-bestand",
+ "Select the index file to use for the conversion.": "Selecteer het indexbestand dat u voor de conversie wilt gebruiken.",
+ "Refresh": "Opfrissen",
+ "Unload Voice": "Stem lossen",
+ "Single": "Ongetrouwd",
+ "Upload Audio": "Audio uploaden",
+ "Select Audio": "Selecteer Audio",
+ "Select the audio to convert.": "Selecteer de audio die u wilt converteren.",
+ "Advanced Settings": "Geavanceerde instellingen",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Uitvoer wissen (verwijdert alle audio in assets/audio)",
+ "Custom Output Path": "Aangepast uitvoerpad",
+ "Output Path": "Uitgang Pad",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Het pad waar de uitvoeraudio wordt opgeslagen, standaard in assets/audios/output.wav",
+ "Split Audio": "Audio splitsen",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Splits de audio op in stukken voor gevolgtrekking om in sommige gevallen betere resultaten te verkrijgen.",
+ "Autotune": "Automatisch afstemmen",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Pas een zachte autotune toe op je inferenties, aanbevolen voor zangconversies.",
+ "Clean Audio": "Schone audio",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Reinig uw audio-uitvoer met behulp van ruisdetectie-algoritmen, aanbevolen voor gesproken audio.",
+ "Clean Strength": "Schone kracht",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Stel het opschoningsniveau in op de gewenste audio, hoe meer u het verhoogt, hoe meer het zal opschonen, maar het is mogelijk dat de audio meer gecomprimeerd zal zijn.",
+ "Pitch": "Toonhoogte",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Stel de toonhoogte van de audio in, hoe hoger de waarde, hoe hoger de toonhoogte.",
+ "Filter Radius": "Filter Straal",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Als het getal groter is dan of gelijk is aan drie, kan het gebruik van mediane filtering op de verzamelde toonresultaten de ademhaling verminderen.",
+ "Search Feature Ratio": "Verhouding zoekfunctie",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Invloed uitgeoefend door het indexbestand; Een hogere waarde komt overeen met een grotere invloed. Als u echter voor lagere waarden kiest, kunt u de artefacten in de audio verminderen.",
+ "Volume Envelope": "Volume Envelop",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Vervang of meng met de volume-envelop van de uitvoer. Hoe dichter de verhouding bij 1 ligt, hoe meer de uitvoerenveloppe wordt gebruikt.",
+ "Protect Voiceless Consonants": "Bescherm stemloze medeklinkers",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Bescherm verschillende medeklinkers en ademhalingsgeluiden om elektro-akoestische scheuren en andere artefacten te voorkomen. Door de parameter naar de maximale waarde van 0,5 te trekken, wordt uitgebreide bescherming geboden. Het verlagen van deze waarde kan echter de mate van bescherming verminderen en mogelijk het indexeringseffect beperken.",
+ "Pitch extraction algorithm": "Algoritme voor het extraheren van toonhoogte",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Pitch-extractie-algoritme om te gebruiken voor de audioconversie. Het standaardalgoritme is rmvpe, wat in de meeste gevallen wordt aanbevolen.",
+ "Convert": "Omzetten",
+ "Export Audio": "Audio exporteren",
+ "Batch": "Batch",
+ "Input Folder": "Invoermap",
+ "Select the folder containing the audios to convert.": "Selecteer de map met de audio die u wilt converteren.",
+ "Enter input path": "Voer het invoerpad in",
+ "Output Folder": "Uitvoer map",
+ "Select the folder where the output audios will be saved.": "Selecteer de map waar de uitvoeraudio wordt opgeslagen.",
+ "Enter output path": "Voer het uitvoerpad in",
+ "Get information about the audio": "Informatie over de audio opvragen",
+ "Information about the audio file": "Informatie over het audiobestand",
+ "Waiting for information...": "Wachten op informatie...",
+ "## Voice Blender": "## Stem Blender",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Selecteer twee stemmodellen, stel het gewenste mengpercentage in en meng ze tot een geheel nieuwe stem.",
+ "Voice Blender": "Stem Blender",
+ "Drag and drop your model here": "Sleep uw model hierheen",
+ "You can also use a custom path.": "U kunt ook een aangepast pad gebruiken.",
+ "Blend Ratio": "Mengverhouding",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Door de positie meer naar de ene of de andere kant aan te passen, lijkt het model meer op het eerste of tweede.",
+ "Fusion": "Samensmelting",
+ "Path to Model": "Pad naar model",
+ "Enter path to model": "Pad naar model invoeren",
+ "Model information to be placed": "Te plaatsen modelinformatie",
+ "Inroduce the model information": "Produceer de modelinformatie",
+ "The information to be placed in the model (You can leave it blank or put anything).": "De informatie die in het model moet worden geplaatst (u kunt het leeg laten of alles plaatsen).",
+ "View model information": "Modelinformatie weergeven",
+ "Introduce the model pth path": "Introduceer het model pth pad",
+ "View": "Bekijken",
+ "Model extraction": "Extractie van modellen",
+ "Model conversion": "Model conversie",
+ "Pth file": "Pth-bestand",
+ "Output of the pth file": "Uitvoer van het pth-bestand",
+ "# How to Report an Issue on GitHub": "# Een probleem melden op GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Klik op de knop 'Opnamescherm' hieronder om te beginnen met het opnemen van het probleem dat u ondervindt.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Als u klaar bent met het opnemen van het probleem, klikt u op de knop 'Opname stoppen' (dezelfde knop, maar het label verandert afhankelijk van of u actief aan het opnemen bent of niet).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Ga naar [GitHub Issues](https://github.com/IAHispano/Applio/issues) en klik op de knop 'New Issue'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Vul het meegeleverde uitgiftesjabloon in, zorg ervoor dat u indien nodig details opneemt en gebruik het gedeelte Activa om het opgenomen bestand uit de vorige stap te uploaden.",
+ "Record Screen": "Scherm opnemen",
+ "Record": "Record",
+ "Stop Recording": "Opname stoppen",
+ "Introduce the model .pth path": "Introduceer het model .pth-pad",
+ "See Model Information": "Modelinformatie bekijken",
+ "## Download Model": "## Model downloaden",
+ "Model Link": "Koppeling naar het model",
+ "Introduce the model link": "Introduceer de modellink",
+ "Download Model": "Model downloaden",
+ "## Drop files": "## Bestanden neerzetten",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Sleep uw .pth-bestand en .index-bestand naar deze ruimte. Sleep de ene en dan de andere.",
+ "TTS Voices": "TTS-stemmen",
+ "Select the TTS voice to use for the conversion.": "Selecteer de TTS-stem die u voor de conversie wilt gebruiken.",
+ "Text to Synthesize": "Tekst om te synthetiseren",
+ "Enter the text to synthesize.": "Voer de tekst in die u wilt synthetiseren.",
+ "Or you can upload a .txt file": "Of u kunt een .txt bestand uploaden",
+ "Enter text to synthesize": "Voer tekst in om te synthetiseren",
+ "Output Path for TTS Audio": "Uitvoerpad voor TTS-audio",
+ "Output Path for RVC Audio": "Uitvoerpad voor RVC-audio",
+ "Enable Applio integration with Discord presence": "Applio-integratie inschakelen met Discord-aanwezigheid",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Het activeert de mogelijkheid om de huidige Applio-activiteit in Discord weer te geven.",
+ "Enable Applio integration with applio.org/models using flask": "Applio-integratie met applio.org/models inschakelen met behulp van kolf",
+ "It will activate the possibility of downloading models with a click from the website.": "Het activeert de mogelijkheid om modellen te downloaden met een klik van de website.",
+ "Theme": "Thema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Selecteer het thema dat je wilt gebruiken. (Vereist het opnieuw opstarten van Applio)",
+ "Language": "Taal",
+ "Select the language you want to use. (Requires restarting Applio)": "Selecteer de taal die u wilt gebruiken. (Vereist het opnieuw opstarten van Applio)",
+ "Plugin Installer": "Plug-in-installatieprogramma",
+ "Drag your plugin.zip to install it": "Sleep uw plugin.zip om deze te installeren",
+ "Version Checker": "Versie Checker",
+ "Check which version of Applio is the latest to see if you need to update.": "Controleer welke versie van Applio de nieuwste is om te zien of u moet updaten.",
+ "Check for updates": "Controleren op updates"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/pa_PA.json b/assets/i18n/languages/pa_PA.json
new file mode 100644
index 0000000000000000000000000000000000000000..21199db597a7785acf08968714cb4d5808844a8c
--- /dev/null
+++ b/assets/i18n/languages/pa_PA.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "ਅੰਤਮ ਵੌਇਸ ਕਲੋਨਿੰਗ ਟੂਲ, ਬੇਮਿਸਾਲ ਸ਼ਕਤੀ, ਮਾਡਿਊਲਰਿਟੀ, ਅਤੇ ਉਪਭੋਗਤਾ-ਅਨੁਕੂਲ ਅਨੁਭਵ ਲਈ ਧਿਆਨ ਨਾਲ ਅਨੁਕੂਲ ਬਣਾਇਆ ਗਿਆ ਹੈ.",
+ "This section contains some extra utilities that often may be in experimental phases.": "ਇਸ ਭਾਗ ਵਿੱਚ ਕੁਝ ਵਾਧੂ ਉਪਯੋਗਤਾਵਾਂ ਹਨ ਜੋ ਅਕਸਰ ਪ੍ਰਯੋਗਾਤਮਕ ਪੜਾਵਾਂ ਵਿੱਚ ਹੋ ਸਕਦੀਆਂ ਹਨ।",
+ "Output Information": "ਆਊਟਪੁੱਟ ਜਾਣਕਾਰੀ",
+ "The output information will be displayed here.": "ਆਉਟਪੁੱਟ ਜਾਣਕਾਰੀ ਇੱਥੇ ਪ੍ਰਦਰਸ਼ਿਤ ਕੀਤੀ ਜਾਵੇਗੀ।",
+ "Inference": "ਅਨੁਮਾਨ",
+ "Train": "ਟ੍ਰੇਨ",
+ "Extra": "ਵਾਧੂ",
+ "Merge Audios": "ਆਡੀਓ ਨੂੰ ਮਿਲਾਓ",
+ "Processing": "ਪ੍ਰੋਸੈਸਿੰਗ",
+ "Audio Analyzer": "ਆਡੀਓ ਵਿਸ਼ਲੇਸ਼ਕ",
+ "Model Information": "ਮਾਡਲ ਜਾਣਕਾਰੀ",
+ "Plugins": "ਪਲੱਗਇਨ",
+ "Download": "ਡਾਊਨਲੋਡ ਕਰੋ",
+ "Report a Bug": "ਇੱਕ ਬਗ ਦੀ ਰਿਪੋਰਟ ਕਰੋ",
+ "Settings": "ਸੈਟਿੰਗਾਂ",
+ "Preprocess": "ਪ੍ਰੀਪ੍ਰੋਸੈਸ",
+ "Model Name": "ਮਾਡਲ ਦਾ ਨਾਮ",
+ "Name of the new model.": "ਨਵੇਂ ਮਾਡਲ ਦਾ ਨਾਮ।",
+ "Enter model name": "ਮਾਡਲ ਨਾਮ ਦਾਖਲ ਕਰੋ",
+ "Dataset Path": "ਡਾਟਾਸੈਟ ਪਾਥ",
+ "Path to the dataset folder.": "ਡੇਟਾਸੈਟ ਫੋਲਡਰ ਲਈ ਰਾਹ।",
+ "Refresh Datasets": "ਡੇਟਾਸੈਟਾਂ ਨੂੰ ਤਾਜ਼ਾ ਕਰੋ",
+ "Dataset Creator": "ਡਾਟਾਸੈਟ ਨਿਰਮਾਤਾ",
+ "Dataset Name": "ਡੇਟਾਸੈਟ ਨਾਮ",
+ "Name of the new dataset.": "ਨਵੇਂ ਡੇਟਾਸੈਟ ਦਾ ਨਾਮ।",
+ "Enter dataset name": "ਡੇਟਾਸੈਟ ਦਾ ਨਾਮ ਦਾਖਲ ਕਰੋ",
+ "Upload Audio Dataset": "ਆਡੀਓ ਡੇਟਾਸੈਟ ਅੱਪਲੋਡ ਕਰੋ",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ਆਡੀਓ ਫਾਇਲ ਨੂੰ ਡੇਟਾਸੈਟ ਵਿੱਚ ਸਫਲਤਾਪੂਰਵਕ ਜੋੜਿਆ ਗਿਆ ਹੈ। ਕਿਰਪਾ ਕਰਕੇ ਪ੍ਰੀਪ੍ਰੋਸੈਸ ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।",
+ "Enter dataset path": "ਡਾਟਾਸੈਟ ਪਾਥ ਦਾਖਲ ਕਰੋ",
+ "Sampling Rate": "ਨਮੂਨੇ ਲੈਣ ਦੀ ਦਰ",
+ "The sampling rate of the audio files.": "ਆਡੀਓ ਫਾਇਲਾਂ ਦੀ ਨਮੂਨੇ ਲੈਣ ਦੀ ਦਰ।",
+ "Model Architecture": "RVC ਸੰਸਕਰਣ",
+ "Version of the model architecture.": "ਮਾਡਲ ਦਾ ਆਰਵੀਸੀ ਸੰਸਕਰਣ.",
+ "Preprocess Dataset": "ਪ੍ਰੀਪ੍ਰੋਸੈਸ ਡੇਟਾਸੈਟ",
+ "Extract": "ਐਕਸਟਰੈਕਟ",
+ "Hop Length": "ਹੌਪ ਲੰਬਾਈ",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "ਸਿਸਟਮ ਨੂੰ ਮਹੱਤਵਪੂਰਣ ਪਿਚ ਤਬਦੀਲੀ ਵਿੱਚ ਤਬਦੀਲ ਹੋਣ ਵਿੱਚ ਲੱਗਣ ਵਾਲੇ ਸਮੇਂ ਨੂੰ ਦਰਸਾਉਂਦਾ ਹੈ। ਛੋਟੀਆਂ ਹੌਪ ਲੰਬਾਈਆਂ ਨੂੰ ਅਨੁਮਾਨ ਲਗਾਉਣ ਲਈ ਵਧੇਰੇ ਸਮੇਂ ਦੀ ਲੋੜ ਹੁੰਦੀ ਹੈ ਪਰ ਪਿੱਚ ਦੀ ਸਟੀਕਤਾ ਵਧੇਰੇ ਹੁੰਦੀ ਹੈ।",
+ "Batch Size": "ਬੈਚ ਦਾ ਆਕਾਰ",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "ਇਸ ਨੂੰ ਆਪਣੇ GPU ਦੇ ਉਪਲਬਧ VRAM ਨਾਲ ਜੋੜਨ ਦੀ ਸਲਾਹ ਦਿੱਤੀ ਜਾਂਦੀ ਹੈ। 4 ਦੀ ਸੈਟਿੰਗ ਬਿਹਤਰ ਸ਼ੁੱਧਤਾ ਪਰ ਹੌਲੀ ਪ੍ਰੋਸੈਸਿੰਗ ਦੀ ਪੇਸ਼ਕਸ਼ ਕਰਦੀ ਹੈ, ਜਦੋਂ ਕਿ 8 ਤੇਜ਼ ਅਤੇ ਮਿਆਰੀ ਨਤੀਜੇ ਪ੍ਰਦਾਨ ਕਰਦੀ ਹੈ.",
+ "Save Every Epoch": "ਹਰ ਯੁੱਗ ਨੂੰ ਸੁਰੱਖਿਅਤ ਕਰੋ",
+ "Determine at how many epochs the model will saved at.": "ਇਹ ਨਿਰਧਾਰਤ ਕਰੋ ਕਿ ਮਾਡਲ ਕਿੰਨੇ ਯੁੱਗਾਂ ਵਿੱਚ ਸੁਰੱਖਿਅਤ ਹੋਵੇਗਾ।",
+ "Total Epoch": "ਕੁੱਲ ਯੁੱਗ",
+ "Specifies the overall quantity of epochs for the model training process.": "ਮਾਡਲ ਸਿਖਲਾਈ ਪ੍ਰਕਿਰਿਆ ਲਈ ਯੁੱਗਾਂ ਦੀ ਸਮੁੱਚੀ ਮਾਤਰਾ ਨਿਰਧਾਰਤ ਕਰਦਾ ਹੈ.",
+ "Pretrained": "ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ",
+ "Save Only Latest": "ਕੇਵਲ ਨਵੀਨਤਮ ਨੂੰ ਸੁਰੱਖਿਅਤ ਕਰੋ",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "ਇਸ ਸੈਟਿੰਗ ਨੂੰ ਸਮਰੱਥ ਕਰਨ ਦੇ ਨਤੀਜੇ ਵਜੋਂ ਜੀ ਅਤੇ ਡੀ ਫਾਈਲਾਂ ਸਿਰਫ ਆਪਣੇ ਸਭ ਤੋਂ ਤਾਜ਼ਾ ਸੰਸਕਰਣਾਂ ਨੂੰ ਸੁਰੱਖਿਅਤ ਕਰਨਗੀਆਂ, ਸਟੋਰੇਜ ਸਪੇਸ ਨੂੰ ਪ੍ਰਭਾਵਸ਼ਾਲੀ ਢੰਗ ਨਾਲ ਸੁਰੱਖਿਅਤ ਕਰਨਗੀਆਂ.",
+ "Save Every Weights": "ਹਰ ਭਾਰ ਨੂੰ ਬਚਾਓ",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "ਇਹ ਸੈਟਿੰਗ ਤੁਹਾਨੂੰ ਹਰੇਕ ਯੁੱਗ ਦੇ ਅੰਤ 'ਤੇ ਮਾਡਲ ਦੇ ਭਾਰ ਨੂੰ ਬਚਾਉਣ ਦੇ ਯੋਗ ਬਣਾਉਂਦੀ ਹੈ।",
+ "Custom Pretrained": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ ਮਾਡਲਾਂ ਦੀ ਵਰਤੋਂ ਕਰਨ ਨਾਲ ਵਧੀਆ ਨਤੀਜੇ ਨਿਕਲ ਸਕਦੇ ਹਨ, ਕਿਉਂਕਿ ਵਿਸ਼ੇਸ਼ ਵਰਤੋਂ ਦੇ ਕੇਸ ਦੇ ਅਨੁਸਾਰ ਸਭ ਤੋਂ ਢੁਕਵੇਂ ਪੂਰਵ-ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਮਾਡਲਾਂ ਦੀ ਚੋਣ ਕਰਨ ਨਾਲ ਪ੍ਰਦਰਸ਼ਨ ਵਿੱਚ ਮਹੱਤਵਪੂਰਣ ਵਾਧਾ ਹੋ ਸਕਦਾ ਹੈ.",
+ "Upload Pretrained Model": "ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਮਾਡਲ ਅਪਲੋਡ ਕਰੋ",
+ "Refresh Custom Pretraineds": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡਾਂ ਨੂੰ ਤਾਜ਼ਾ ਕਰੋ",
+ "Pretrained Custom Settings": "ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਕਸਟਮ ਸੈਟਿੰਗਾਂ",
+ "The file you dropped is not a valid pretrained file. Please try again.": "ਤੁਹਾਡੇ ਵੱਲੋਂ ਛੱਡੀ ਗਈ ਫਾਇਲ ਇੱਕ ਵੈਧ ਪੂਰਵ-ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਫਾਇਲ ਨਹੀਂ ਹੈ। ਕਿਰਪਾ ਕਰਕੇ ਦੁਬਾਰਾ ਕੋਸ਼ਿਸ਼ ਕਰੋ।",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "ਡ੍ਰੌਪਡਾਊਨ ਮੀਨੂ ਵਿੱਚ ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਫਾਇਲ ਨੂੰ ਦੇਖਣ ਲਈ ਰੀਫਰੈਸ਼ ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।",
+ "Pretrained G Path": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ G",
+ "Pretrained D Path": "ਕਸਟਮ ਪ੍ਰੀਟ੍ਰੇਨਡ ਡੀ",
+ "GPU Settings": "GPU ਸੈਟਿੰਗਾਂ",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "ਉੱਨਤ GPU ਸੈਟਿੰਗਾਂ ਸੈੱਟ ਕਰਦਾ ਹੈ, ਜੋ ਬਿਹਤਰ GPU ਆਰਕੀਟੈਕਚਰ ਵਾਲੇ ਉਪਭੋਗਤਾਵਾਂ ਲਈ ਸਿਫਾਰਸ਼ ਕੀਤੀ ਜਾਂਦੀ ਹੈ।",
+ "GPU Custom Settings": "GPU ਕਸਟਮ ਸੈਟਿੰਗਾਂ",
+ "GPU Number": "GPU ਨੰਬਰ",
+ "0 to ∞ separated by -": "0 ਤੋਂ ∞ ਦੁਆਰਾ ਵੱਖ ਕੀਤਾ ਜਾਂਦਾ ਹੈ -",
+ "GPU Information": "GPU ਜਾਣਕਾਰੀ",
+ "Pitch Guidance": "ਪਿਚ ਗਾਈਡੈਂਸ",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "ਪਿਚ ਗਾਈਡੈਂਸ ਦੀ ਵਰਤੋਂ ਕਰਕੇ, ਅਸਲ ਆਵਾਜ਼ ਦੇ ਇੰਟਰਨੇਸ਼ਨ ਨੂੰ ਦਰਸਾਉਣਾ ਸੰਭਵ ਹੋ ਜਾਂਦਾ ਹੈ, ਜਿਸ ਵਿੱਚ ਇਸਦੀ ਪਿੱਚ ਵੀ ਸ਼ਾਮਲ ਹੈ. ਇਹ ਵਿਸ਼ੇਸ਼ਤਾ ਗਾਇਕੀ ਅਤੇ ਹੋਰ ਦ੍ਰਿਸ਼ਾਂ ਲਈ ਵਿਸ਼ੇਸ਼ ਤੌਰ 'ਤੇ ਕੀਮਤੀ ਹੈ ਜਿੱਥੇ ਮੂਲ ਸੁਰ ਜਾਂ ਪਿੱਚ ਪੈਟਰਨ ਨੂੰ ਸੁਰੱਖਿਅਤ ਰੱਖਣਾ ਜ਼ਰੂਰੀ ਹੈ।",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "ਆਪਣੇ ਆਪ ਨੂੰ ਸਿਖਲਾਈ ਦਿੰਦੇ ਸਮੇਂ ਪਹਿਲਾਂ ਤੋਂ ਸਿਖਲਾਈ ਪ੍ਰਾਪਤ ਮਾਡਲਾਂ ਦੀ ਵਰਤੋਂ ਕਰੋ। ਇਹ ਪਹੁੰਚ ਸਿਖਲਾਈ ਦੀ ਮਿਆਦ ਨੂੰ ਘਟਾਉਂਦੀ ਹੈ ਅਤੇ ਸਮੁੱਚੀ ਗੁਣਵੱਤਾ ਨੂੰ ਵਧਾਉਂਦੀ ਹੈ।",
+ "Extract Features": "ਐਕਸਟਰੈਕਟ ਵਿਸ਼ੇਸ਼ਤਾਵਾਂ",
+ "Start Training": "ਸਿਖਲਾਈ ਸ਼ੁਰੂ ਕਰੋ",
+ "Generate Index": "ਇੰਡੈਕਸ ਜਨਰੇਟ ਕਰੋ",
+ "Voice Model": "ਵੌਇਸ ਮਾਡਲ",
+ "Select the voice model to use for the conversion.": "ਪਰਿਵਰਤਨ ਲਈ ਵਰਤਣ ਲਈ ਵੌਇਸ ਮਾਡਲ ਦੀ ਚੋਣ ਕਰੋ।",
+ "Index File": "ਇੰਡੈਕਸ ਫਾਇਲ",
+ "Select the index file to use for the conversion.": "ਪਰਿਵਰਤਨ ਲਈ ਵਰਤਣ ਲਈ ਇੰਡੈਕਸ ਫਾਇਲ ਦੀ ਚੋਣ ਕਰੋ।",
+ "Refresh": "ਤਾਜ਼ਾ ਕਰੋ",
+ "Unload Voice": "ਆਵਾਜ਼ ਨੂੰ ਅਨਲੋਡ ਕਰੋ",
+ "Single": "ਸਿੰਗਲ",
+ "Upload Audio": "ਆਡੀਓ ਅੱਪਲੋਡ ਕਰੋ",
+ "Select Audio": "ਆਡੀਓ ਚੁਣੋ",
+ "Select the audio to convert.": "ਕਨਵਰਟ ਕਰਨ ਲਈ ਆਡੀਓ ਦੀ ਚੋਣ ਕਰੋ।",
+ "Advanced Settings": "ਉੱਨਤ ਸੈਟਿੰਗਾਂ",
+ "Clear Outputs (Deletes all audios in assets/audios)": "ਸਪਸ਼ਟ ਆਊਟਪੁੱਟ (ਜਾਇਦਾਦਾਂ/ਆਡੀਓ ਵਿੱਚ ਸਾਰੇ ਆਡੀਓ ਮਿਟਾ ਦਿੰਦਾ ਹੈ)",
+ "Custom Output Path": "ਕਸਟਮ ਆਉਟਪੁੱਟ ਪਾਥ",
+ "Output Path": "ਆਊਟਪੁੱਟ ਪਾਥ",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "ਉਹ ਰਸਤਾ ਜਿੱਥੇ ਆਉਟਪੁੱਟ ਆਡੀਓ ਨੂੰ ਸੁਰੱਖਿਅਤ ਕੀਤਾ ਜਾਵੇਗਾ, ਸੰਪਤੀਆਂ/ਆਡੀਓਜ਼/output.wav",
+ "Split Audio": "ਸਪਲਿਟ ਆਡੀਓ",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "ਕੁਝ ਮਾਮਲਿਆਂ ਵਿੱਚ ਬਿਹਤਰ ਨਤੀਜੇ ਪ੍ਰਾਪਤ ਕਰਨ ਲਈ ਅਨੁਮਾਨ ਲਈ ਆਡੀਓ ਨੂੰ ਟੁਕੜਿਆਂ ਵਿੱਚ ਵੰਡੋ।",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "ਆਪਣੇ ਅਨੁਮਾਨਾਂ ਲਈ ਇੱਕ ਨਰਮ ਆਟੋਟਿਊਨ ਲਾਗੂ ਕਰੋ, ਜਿਸ ਦੀ ਸਿਫਾਰਸ਼ ਗਾਇਨ ਪਰਿਵਰਤਨ ਲਈ ਕੀਤੀ ਜਾਂਦੀ ਹੈ।",
+ "Clean Audio": "ਕਲੀਨ ਆਡੀਓ",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ਆਡੀਓ ਬੋਲਣ ਲਈ ਸਿਫਾਰਸ਼ ਕੀਤੇ ਗਏ ਸ਼ੋਰ ਦਾ ਪਤਾ ਲਗਾਉਣ ਵਾਲੇ ਐਲਗੋਰਿਦਮ ਦੀ ਵਰਤੋਂ ਕਰਕੇ ਆਪਣੇ ਆਡੀਓ ਆਉਟਪੁੱਟ ਨੂੰ ਸਾਫ਼ ਕਰੋ।",
+ "Clean Strength": "ਸਾਫ਼ ਤਾਕਤ",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "ਜਿਸ ਆਡੀਓ ਨੂੰ ਤੁਸੀਂ ਚਾਹੁੰਦੇ ਹੋ ਉਸ 'ਤੇ ਕਲੀਨ-ਅੱਪ ਪੱਧਰ ਸੈੱਟ ਕਰੋ, ਜਿੰਨਾ ਜ਼ਿਆਦਾ ਤੁਸੀਂ ਇਸ ਨੂੰ ਵਧਾਓਗੇ, ਓਨਾ ਹੀ ਇਹ ਸਾਫ਼ ਹੋ ਜਾਵੇਗਾ, ਪਰ ਇਹ ਸੰਭਵ ਹੈ ਕਿ ਆਡੀਓ ਵਧੇਰੇ ਸੰਕੁਚਿਤ ਹੋ ਜਾਵੇਗਾ.",
+ "Pitch": "ਪਿਚ",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ਆਡੀਓ ਦੀ ਪਿਚ ਸੈੱਟ ਕਰੋ, ਜਿੰਨਾ ਜ਼ਿਆਦਾ ਮੁੱਲ ਹੋਵੇਗਾ, ਪਿਚ ਓਨੀ ਹੀ ਉੱਚੀ ਹੋਵੇਗੀ।",
+ "Filter Radius": "ਫਿਲਟਰ ਰੇਡੀਅਸ",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "ਜੇ ਗਿਣਤੀ ਤਿੰਨ ਤੋਂ ਵੱਧ ਜਾਂ ਬਰਾਬਰ ਹੈ, ਤਾਂ ਇਕੱਤਰ ਕੀਤੇ ਟੋਨ ਨਤੀਜਿਆਂ 'ਤੇ ਮੀਡੀਅਨ ਫਿਲਟਰਿੰਗ ਲਗਾਉਣ ਨਾਲ ਸਾਹ ਲੈਣ ਵਿੱਚ ਕਮੀ ਆਉਣ ਦੀ ਸੰਭਾਵਨਾ ਹੁੰਦੀ ਹੈ.",
+ "Search Feature Ratio": "ਖੋਜ ਵਿਸ਼ੇਸ਼ਤਾ ਅਨੁਪਾਤ",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "ਇੰਡੈਕਸ ਫਾਈਲ ਦੁਆਰਾ ਪਾਇਆ ਗਿਆ ਪ੍ਰਭਾਵ; ਇੱਕ ਉੱਚ ਮੁੱਲ ਵਧੇਰੇ ਪ੍ਰਭਾਵ ਨਾਲ ਮੇਲ ਖਾਂਦਾ ਹੈ. ਹਾਲਾਂਕਿ, ਘੱਟ ਮੁੱਲਾਂ ਦੀ ਚੋਣ ਕਰਨਾ ਆਡੀਓ ਵਿੱਚ ਮੌਜੂਦ ਕਲਾਕ੍ਰਿਤੀਆਂ ਨੂੰ ਘਟਾਉਣ ਵਿੱਚ ਮਦਦ ਕਰ ਸਕਦਾ ਹੈ।",
+ "Volume Envelope": "ਵਾਲਿਊਮ ਲਿਫਾਫਾ",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "ਆਉਟਪੁੱਟ ਦੇ ਵਾਲੀਅਮ ਲਿਫਾਫੇ ਨੂੰ ਬਦਲੋ ਜਾਂ ਮਿਸ਼ਰਣ ਕਰੋ। ਅਨੁਪਾਤ ਜਿੰਨਾ 1 ਦੇ ਨੇੜੇ ਹੁੰਦਾ ਹੈ, ਓਨਾ ਹੀ ਆਉਟਪੁੱਟ ਲਿਫਾਫਾ ਵਰਤਿਆ ਜਾਂਦਾ ਹੈ.",
+ "Protect Voiceless Consonants": "ਅਵਾਜ਼ ਰਹਿਤ ਵਿੰਜਨਾਂ ਦੀ ਰੱਖਿਆ ਕਰੋ",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "ਇਲੈਕਟ੍ਰੋ-ਐਕੋਸਟਿਕ ਫਾੜਨ ਅਤੇ ਹੋਰ ਕਲਾਕ੍ਰਿਤੀਆਂ ਨੂੰ ਰੋਕਣ ਲਈ ਵੱਖਰੇ ਵਿਜਨਾਂ ਅਤੇ ਸਾਹ ਲੈਣ ਦੀਆਂ ਆਵਾਜ਼ਾਂ ਦੀ ਰੱਖਿਆ ਕਰੋ। ਪੈਰਾਮੀਟਰ ਨੂੰ ਇਸਦੇ ਵੱਧ ਤੋਂ ਵੱਧ ਮੁੱਲ 0.5 ਤੱਕ ਖਿੱਚਣਾ ਵਿਆਪਕ ਸੁਰੱਖਿਆ ਪ੍ਰਦਾਨ ਕਰਦਾ ਹੈ। ਹਾਲਾਂਕਿ, ਇਸ ਮੁੱਲ ਨੂੰ ਘਟਾਉਣ ਨਾਲ ਸੁਰੱਖਿਆ ਦੀ ਹੱਦ ਘੱਟ ਹੋ ਸਕਦੀ ਹੈ ਜਦੋਂ ਕਿ ਸੰਭਾਵਿਤ ਤੌਰ 'ਤੇ ਸੂਚਕਪ੍ਰਭਾਵ ਨੂੰ ਘਟਾਇਆ ਜਾ ਸਕਦਾ ਹੈ.",
+ "Pitch extraction algorithm": "ਪਿਚ ਐਕਸਟਰੈਕਸ਼ਨ ਐਲਗੋਰਿਦਮ",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ਆਡੀਓ ਪਰਿਵਰਤਨ ਲਈ ਵਰਤਣ ਲਈ ਪਿਚ ਐਕਸਟਰੈਕਸ਼ਨ ਐਲਗੋਰਿਦਮ. ਡਿਫਾਲਟ ਐਲਗੋਰਿਦਮ rmvpe ਹੈ, ਜਿਸਦੀ ਸਿਫਾਰਸ਼ ਜ਼ਿਆਦਾਤਰ ਮਾਮਲਿਆਂ ਲਈ ਕੀਤੀ ਜਾਂਦੀ ਹੈ।",
+ "Convert": "ਕਨਵਰਟ ਕਰੋ",
+ "Export Audio": "ਆਡੀਓ ਨਿਰਯਾਤ ਕਰੋ",
+ "Batch": "ਬੈਚ",
+ "Input Folder": "ਇਨਪੁੱਟ ਫੋਲਡਰ",
+ "Select the folder containing the audios to convert.": "ਬਦਲਣ ਲਈ ਆਡੀਓ ਵਾਲੇ ਫੋਲਡਰ ਦੀ ਚੋਣ ਕਰੋ।",
+ "Enter input path": "ਇਨਪੁੱਟ ਪਾਥ ਦਾਖਲ ਕਰੋ",
+ "Output Folder": "ਆਊਟਪੁੱਟ ਫੋਲਡਰ",
+ "Select the folder where the output audios will be saved.": "ਉਹ ਫੋਲਡਰ ਚੁਣੋ ਜਿੱਥੇ ਆਉਟਪੁੱਟ ਆਡੀਓ ਸੁਰੱਖਿਅਤ ਕੀਤੇ ਜਾਣਗੇ।",
+ "Enter output path": "ਆਊਟਪੁੱਟ ਪਾਥ ਦਾਖਲ ਕਰੋ",
+ "Get information about the audio": "ਆਡੀਓ ਬਾਰੇ ਜਾਣਕਾਰੀ ਪ੍ਰਾਪਤ ਕਰੋ",
+ "Information about the audio file": "ਆਡੀਓ ਫਾਇਲ ਬਾਰੇ ਜਾਣਕਾਰੀ",
+ "Waiting for information...": "ਜਾਣਕਾਰੀ ਦੀ ਉਡੀਕ ਕੀਤੀ ਜਾ ਰਹੀ ਹੈ...",
+ "## Voice Blender": "## ਵੌਇਸ ਬਲੇਂਡਰ",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "ਦੋ ਵੌਇਸ ਮਾਡਲਾਂ ਦੀ ਚੋਣ ਕਰੋ, ਆਪਣੀ ਲੋੜੀਂਦੀ ਮਿਸ਼ਰਣ ਪ੍ਰਤੀਸ਼ਤਤਾ ਸੈੱਟ ਕਰੋ, ਅਤੇ ਉਨ੍ਹਾਂ ਨੂੰ ਪੂਰੀ ਤਰ੍ਹਾਂ ਨਵੀਂ ਆਵਾਜ਼ ਵਿੱਚ ਮਿਲਾਓ.",
+ "Voice Blender": "ਵੌਇਸ ਬਲੇਂਡਰ",
+ "Drag and drop your model here": "ਆਪਣੇ ਮਾਡਲ ਨੂੰ ਇੱਥੇ ਖਿੱਚ ਕੇ ਛੱਡ ਦਿਓ",
+ "You can also use a custom path.": "ਤੁਸੀਂ ਇੱਕ ਕਸਟਮ ਪਾਥ ਦੀ ਵਰਤੋਂ ਵੀ ਕਰ ਸਕਦੇ ਹੋ।",
+ "Blend Ratio": "ਮਿਸ਼ਰਣ ਅਨੁਪਾਤ",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "ਸਥਿਤੀ ਨੂੰ ਇੱਕ ਪਾਸੇ ਜਾਂ ਦੂਜੇ ਪਾਸੇ ਵਧੇਰੇ ਵਿਵਸਥਿਤ ਕਰਨਾ ਮਾਡਲ ਨੂੰ ਪਹਿਲੇ ਜਾਂ ਦੂਜੇ ਦੇ ਸਮਾਨ ਬਣਾ ਦੇਵੇਗਾ।",
+ "Fusion": "ਫਿਊਜ਼ਨ",
+ "Path to Model": "ਮਾਡਲ ਲਈ ਰਾਹ",
+ "Enter path to model": "ਮਾਡਲ ਲਈ ਪਾਥ ਦਾਖਲ ਕਰੋ",
+ "Model information to be placed": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਰੱਖੀ ਜਾਣੀ ਚਾਹੀਦੀ ਹੈ",
+ "Inroduce the model information": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਨੂੰ ਪੇਸ਼ ਕਰੋ",
+ "The information to be placed in the model (You can leave it blank or put anything).": "ਮਾਡਲ ਵਿੱਚ ਰੱਖੀ ਜਾਣ ਵਾਲੀ ਜਾਣਕਾਰੀ (ਤੁਸੀਂ ਇਸ ਨੂੰ ਖਾਲੀ ਛੱਡ ਸਕਦੇ ਹੋ ਜਾਂ ਕੁਝ ਵੀ ਪਾ ਸਕਦੇ ਹੋ)।",
+ "View model information": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਦੇਖੋ",
+ "Introduce the model pth path": "ਮਾਡਲ pth ਪਾਥ ਨੂੰ ਪੇਸ਼ ਕਰੋ",
+ "View": "ਦ੍ਰਿਸ਼",
+ "Model extraction": "ਮਾਡਲ ਕੱਢਣਾ",
+ "Model conversion": "ਮਾਡਲ ਪਰਿਵਰਤਨ",
+ "Pth file": "Pth ਫਾਇਲ",
+ "Output of the pth file": "pth ਫਾਇਲ ਦਾ ਆਊਟਪੁੱਟ",
+ "# How to Report an Issue on GitHub": "# GitHub 'ਤੇ ਕਿਸੇ ਮੁੱਦੇ ਦੀ ਰਿਪੋਰਟ ਕਿਵੇਂ ਕਰਨੀ ਹੈ",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. ਤੁਹਾਡੇ ਵੱਲੋਂ ਅਨੁਭਵ ਕੀਤੀ ਜਾ ਰਹੀ ਸਮੱਸਿਆ ਨੂੰ ਰਿਕਾਰਡ ਕਰਨਾ ਸ਼ੁਰੂ ਕਰਨ ਲਈ ਹੇਠਾਂ ਦਿੱਤੇ 'ਰਿਕਾਰਡ ਸਕ੍ਰੀਨ' ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ਇੱਕ ਵਾਰ ਜਦੋਂ ਤੁਸੀਂ ਸਮੱਸਿਆ ਨੂੰ ਰਿਕਾਰਡ ਕਰਨਾ ਪੂਰਾ ਕਰ ਲੈਂਦੇ ਹੋ, ਤਾਂ 'ਸਟਾਪ ਰਿਕਾਰਡਿੰਗ' ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ (ਉਹੀ ਬਟਨ, ਪਰ ਲੇਬਲ ਇਸ ਗੱਲ 'ਤੇ ਨਿਰਭਰ ਕਰਦਾ ਹੈ ਕਿ ਤੁਸੀਂ ਸਰਗਰਮੀ ਨਾਲ ਰਿਕਾਰਡਿੰਗ ਕਰ ਰਹੇ ਹੋ ਜਾਂ ਨਹੀਂ)।",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub ਮੁੱਦੇ] (https://github.com/IAHispano/Applio/issues) 'ਤੇ ਜਾਓ ਅਤੇ 'ਨਵਾਂ ਮੁੱਦਾ' ਬਟਨ 'ਤੇ ਕਲਿੱਕ ਕਰੋ।",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. ਪ੍ਰਦਾਨ ਕੀਤੇ ਮੁੱਦੇ ਦੇ ਟੈਂਪਲੇਟ ਨੂੰ ਪੂਰਾ ਕਰੋ, ਲੋੜ ਅਨੁਸਾਰ ਵੇਰਵੇ ਸ਼ਾਮਲ ਕਰਨਾ ਯਕੀਨੀ ਬਣਾਓ, ਅਤੇ ਪਿਛਲੇ ਕਦਮ ਤੋਂ ਰਿਕਾਰਡ ਕੀਤੀ ਫਾਈਲ ਨੂੰ ਅੱਪਲੋਡ ਕਰਨ ਲਈ ਸੰਪਤੀ ਸੈਕਸ਼ਨ ਦੀ ਵਰਤੋਂ ਕਰੋ.",
+ "Record Screen": "ਰਿਕਾਰਡ ਸਕ੍ਰੀਨ",
+ "Record": "ਰਿਕਾਰਡ",
+ "Stop Recording": "ਰਿਕਾਰਡਿੰਗ ਬੰਦ ਕਰੋ",
+ "Introduce the model .pth path": "ਮਾਡਲ .pth path ਨੂੰ ਪੇਸ਼ ਕਰੋ",
+ "See Model Information": "ਮਾਡਲ ਜਾਣਕਾਰੀ ਦੇਖੋ",
+ "## Download Model": "## ਡਾਊਨਲੋਡ ਮਾਡਲ",
+ "Model Link": "ਮਾਡਲ ਲਿੰਕ",
+ "Introduce the model link": "ਮਾਡਲ ਲਿੰਕ ਪੇਸ਼ ਕਰੋ",
+ "Download Model": "ਮਾਡਲ ਡਾਊਨਲੋਡ ਕਰੋ",
+ "## Drop files": "## ਫਾਇਲਾਂ ਛੱਡੋ",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "ਆਪਣੀ .pth ਫਾਇਲ ਅਤੇ .index ਫਾਇਲ ਨੂੰ ਇਸ ਸਪੇਸ ਵਿੱਚ ਖਿੱਚੋ। ਇੱਕ ਨੂੰ ਖਿੱਚੋ ਅਤੇ ਫਿਰ ਦੂਜੇ ਨੂੰ।",
+ "TTS Voices": "TTS ਆਵਾਜ਼ਾਂ",
+ "Select the TTS voice to use for the conversion.": "ਪਰਿਵਰਤਨ ਲਈ ਵਰਤਣ ਲਈ TTS ਆਵਾਜ਼ ਦੀ ਚੋਣ ਕਰੋ।",
+ "Text to Synthesize": "ਸੰਸ਼ਲੇਸ਼ਣ ਕਰਨ ਲਈ ਟੈਕਸਟ",
+ "Enter the text to synthesize.": "ਸੰਸ਼ਲੇਸ਼ਿਤ ਕਰਨ ਲਈ ਟੈਕਸਟ ਦਾਖਲ ਕਰੋ।",
+ "Or you can upload a .txt file": "ਜਾਂ ਤੁਸੀਂ ਕੋਈ .txt ਫਾਇਲ ਅੱਪਲੋਡ ਕਰ ਸਕਦੇ ਹੋ",
+ "Enter text to synthesize": "ਸੰਸ਼ਲੇਸ਼ਿਤ ਕਰਨ ਲਈ ਟੈਕਸਟ ਦਾਖਲ ਕਰੋ",
+ "Output Path for TTS Audio": "TTS ਆਡੀਓ ਲਈ ਆਉਟਪੁੱਟ ਪਾਥ",
+ "Output Path for RVC Audio": "RVC ਆਡੀਓ ਲਈ ਆਉਟਪੁੱਟ ਪਾਥ",
+ "Enable Applio integration with Discord presence": "ਡਿਸਕਾਰਡ ਦੀ ਮੌਜੂਦਗੀ ਨਾਲ ਐਪਲੀਓ ਏਕੀਕਰਣ ਨੂੰ ਸਮਰੱਥ ਕਰੋ",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "ਇਹ ਡਿਸਕਾਰਡ ਵਿੱਚ ਮੌਜੂਦਾ ਐਪਲੀਓ ਗਤੀਵਿਧੀ ਨੂੰ ਪ੍ਰਦਰਸ਼ਿਤ ਕਰਨ ਦੀ ਸੰਭਾਵਨਾ ਨੂੰ ਕਿਰਿਆਸ਼ੀਲ ਕਰੇਗਾ।",
+ "Enable Applio integration with applio.org/models using flask": "ਫਲਾਸਕ ਦੀ ਵਰਤੋਂ ਕਰਕੇ applio.org/models ਨਾਲ ਐਪਲਿਓ ਏਕੀਕਰਣ ਨੂੰ ਸਮਰੱਥ ਕਰੋ",
+ "It will activate the possibility of downloading models with a click from the website.": "ਇਹ ਵੈਬਸਾਈਟ ਤੋਂ ਇੱਕ ਕਲਿੱਕ ਨਾਲ ਮਾਡਲਾਂ ਨੂੰ ਡਾਊਨਲੋਡ ਕਰਨ ਦੀ ਸੰਭਾਵਨਾ ਨੂੰ ਕਿਰਿਆਸ਼ੀਲ ਕਰੇਗਾ।",
+ "Theme": "ਥੀਮ",
+ "Select the theme you want to use. (Requires restarting Applio)": "ਉਹ ਥੀਮ ਚੁਣੋ ਜਿਸਨੂੰ ਤੁਸੀਂ ਵਰਤਣਾ ਚਾਹੁੰਦੇ ਹੋ। (ਐਪਲੀਕੇਸ਼ਨ ਨੂੰ ਦੁਬਾਰਾ ਸ਼ੁਰੂ ਕਰਨ ਦੀ ਲੋੜ ਹੈ)",
+ "Language": "ਭਾਸ਼ਾ",
+ "Select the language you want to use. (Requires restarting Applio)": "ਉਹ ਭਾਸ਼ਾ ਚੁਣੋ ਜਿਸਨੂੰ ਤੁਸੀਂ ਵਰਤਣਾ ਚਾਹੁੰਦੇ ਹੋ। (ਐਪਲੀਕੇਸ਼ਨ ਨੂੰ ਦੁਬਾਰਾ ਸ਼ੁਰੂ ਕਰਨ ਦੀ ਲੋੜ ਹੈ)",
+ "Plugin Installer": "ਪਲੱਗਇਨ ਇੰਸਟਾਲਰ",
+ "Drag your plugin.zip to install it": "ਇਸ ਨੂੰ ਇੰਸਟਾਲ ਕਰਨ ਲਈ ਆਪਣੇ plugin.zip ਨੂੰ ਖਿੱਚੋ",
+ "Version Checker": "ਸੰਸਕਰਣ ਚੈਕਰ",
+ "Check which version of Applio is the latest to see if you need to update.": "ਇਹ ਦੇਖਣ ਲਈ ਜਾਂਚ ਕਰੋ ਕਿ ਕੀ ਤੁਹਾਨੂੰ ਅੱਪਡੇਟ ਕਰਨ ਦੀ ਲੋੜ ਹੈ, ਐਪਲੀਓ ਦਾ ਕਿਹੜਾ ਸੰਸਕਰਣ ਨਵੀਨਤਮ ਹੈ।",
+ "Check for updates": "ਅੱਪਡੇਟਾਂ ਵਾਸਤੇ ਜਾਂਚ ਕਰੋ"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/pl_PL.json b/assets/i18n/languages/pl_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..09c14ee62c9b34a0fad157495c4845d881accd54
--- /dev/null
+++ b/assets/i18n/languages/pl_PL.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Najlepsze narzędzie do klonowania głosu, skrupulatnie zoptymalizowane pod kątem niezrównanej mocy, modułowości i przyjazności dla użytkownika.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Ta sekcja zawiera kilka dodatkowych narzędzi, które często mogą znajdować się w fazie eksperymentalnej.",
+ "Output Information": "Informacje wyjściowe",
+ "The output information will be displayed here.": "W tym miejscu zostaną wyświetlone informacje wyjściowe.",
+ "Inference": "Wnioskowanie",
+ "Train": "Pociąg",
+ "Extra": "Dodatkowych",
+ "Merge Audios": "Scal audio",
+ "Processing": "Przetwarzanie",
+ "Audio Analyzer": "Analizator dźwięku",
+ "Model Information": "Informacje o modelu",
+ "Plugins": "Wtyczki",
+ "Download": "Pobierać",
+ "Report a Bug": "Zgłoś błąd",
+ "Settings": "Ustawienia",
+ "Preprocess": "Przetwarzanie wstępne",
+ "Model Name": "Nazwa modelu",
+ "Name of the new model.": "Nazwa nowego modelu.",
+ "Enter model name": "Wprowadź nazwę modelu",
+ "Dataset Path": "Ścieżka zestawu danych",
+ "Path to the dataset folder.": "Ścieżka do folderu zestawu danych.",
+ "Refresh Datasets": "Odświeżanie zestawów danych",
+ "Dataset Creator": "Twórca zestawu danych",
+ "Dataset Name": "Nazwa zestawu danych",
+ "Name of the new dataset.": "Nazwa nowego zestawu danych.",
+ "Enter dataset name": "Wprowadź nazwę zestawu danych",
+ "Upload Audio Dataset": "Przekazywanie zestawu danych audio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Plik audio został pomyślnie dodany do zestawu danych. Kliknij przycisk przetwarzania wstępnego.",
+ "Enter dataset path": "Wprowadź ścieżkę zestawu danych",
+ "Sampling Rate": "Częstotliwość próbkowania",
+ "The sampling rate of the audio files.": "Częstotliwość próbkowania plików audio.",
+ "Model Architecture": "Wersja RVC",
+ "Version of the model architecture.": "Wersja modelu RVC.",
+ "Preprocess Dataset": "Wstępne przetwarzanie zestawu danych",
+ "Extract": "Ekstrakt",
+ "Hop Length": "Długość chmielu",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Oznacza czas potrzebny systemowi do przejścia do znaczącej zmiany wysokości dźwięku. Mniejsze długości przeskoków wymagają więcej czasu na wnioskowanie, ale zwykle zapewniają wyższą dokładność skoku.",
+ "Batch Size": "Wielkość partii",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Wskazane jest, aby dostosować go do dostępnej pamięci VRAM procesora graficznego. Ustawienie 4 zapewnia lepszą dokładność, ale wolniejsze przetwarzanie, podczas gdy 8 zapewnia szybsze i standardowe wyniki.",
+ "Save Every Epoch": "Ocal każdą epokę",
+ "Determine at how many epochs the model will saved at.": "Określ, w ilu epokach model zostanie zapisany.",
+ "Total Epoch": "Epoka ogółem",
+ "Specifies the overall quantity of epochs for the model training process.": "Określa całkowitą liczbę epok dla procesu trenowania modelu.",
+ "Pretrained": "Wstępnie wytrenowany",
+ "Save Only Latest": "Zapisz tylko najnowsze",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Włączenie tego ustawienia spowoduje, że pliki G i D będą zapisywać tylko swoje najnowsze wersje, skutecznie oszczędzając miejsce na dysku.",
+ "Save Every Weights": "Oszczędzaj wszystkie ciężary",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "To ustawienie umożliwia zapisanie wag modelu na końcu każdej epoki.",
+ "Custom Pretrained": "Niestandardowe wstępnie wytrenowane",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Korzystanie z niestandardowych wstępnie wytrenowanych modeli może prowadzić do doskonałych wyników, ponieważ wybranie najbardziej odpowiednich wstępnie wytrenowanych modeli dostosowanych do konkretnego przypadku użycia może znacznie zwiększyć wydajność.",
+ "Upload Pretrained Model": "Przekazywanie wstępnie wytrenowanego modelu",
+ "Refresh Custom Pretraineds": "Odświeżanie niestandardowych wstępnie wytrenowanych",
+ "Pretrained Custom Settings": "Wstępnie wytrenowane ustawienia niestandardowe",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Upuszczony plik nie jest prawidłowym wstępnie wytrenowanym plikiem. Spróbuj ponownie.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Kliknij przycisk odświeżania, aby wyświetlić wstępnie wytrenowany plik w menu rozwijanym.",
+ "Pretrained G Path": "Niestandardowe wstępnie wytrenowane G",
+ "Pretrained D Path": "Niestandardowy wstępnie wytrenowany D",
+ "GPU Settings": "Ustawienia GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Ustawia zaawansowane ustawienia GPU, zalecane dla użytkowników z lepszą architekturą GPU.",
+ "GPU Custom Settings": "Niestandardowe ustawienia GPU",
+ "GPU Number": "Numer GPU",
+ "0 to ∞ separated by -": "Od 0 do ∞ oddzielone -",
+ "GPU Information": "Informacje o procesorze GPU",
+ "Pitch Guidance": "Wskazówki dotyczące wysokości dźwięku",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Stosując wskazówki dotyczące wysokości dźwięku, możliwe staje się odzwierciedlenie intonacji oryginalnego głosu, w tym jego wysokości. Ta funkcja jest szczególnie cenna w przypadku śpiewu i innych scenariuszy, w których niezbędne jest zachowanie oryginalnej melodii lub wzoru wysokości dźwięku.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Korzystaj ze wstępnie wytrenowanych modeli podczas trenowania własnych. Takie podejście skraca czas trwania szkolenia i poprawia ogólną jakość.",
+ "Extract Features": "Wyodrębnij funkcje",
+ "Start Training": "Rozpocznij szkolenie",
+ "Generate Index": "Generuj indeks",
+ "Voice Model": "Model głosu",
+ "Select the voice model to use for the conversion.": "Wybierz model głosu, który ma zostać użyty do konwersji.",
+ "Index File": "Plik indeksu",
+ "Select the index file to use for the conversion.": "Wybierz plik indeksu, który ma zostać użyty do konwersji.",
+ "Refresh": "Odświeżyć",
+ "Unload Voice": "Uwolnij głos",
+ "Single": "Pojedynczy",
+ "Upload Audio": "Prześlij dźwięk",
+ "Select Audio": "Wybierz Audio (Dźwięk)",
+ "Select the audio to convert.": "Wybierz dźwięk do konwersji.",
+ "Advanced Settings": "Ustawienia zaawansowane",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Wyczyść wyjścia (usuwa wszystkie pliki audio w zasobach/plikach audio)",
+ "Custom Output Path": "Niestandardowa ścieżka wyjściowa",
+ "Output Path": "Ścieżka wyjściowa",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Ścieżka, w której zostanie zapisany dźwięk wyjściowy, domyślnie w assets/audios/output.wav",
+ "Split Audio": "Podziel dźwięk",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Podziel dźwięk na fragmenty w celu wnioskowania, aby w niektórych przypadkach uzyskać lepsze wyniki.",
+ "Autotune": "Automatyczne dostrajanie",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Zastosuj miękkie autotune do swoich wniosków, zalecane do konwersji śpiewu.",
+ "Clean Audio": "Czysty dźwięk",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Wyczyść wyjście audio za pomocą algorytmów wykrywania szumów, zalecanych do mówienia audio.",
+ "Clean Strength": "Czysta siła",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Ustaw żądany poziom czyszczenia na żądany dźwięk, im bardziej go zwiększysz, tym bardziej się oczyści, ale możliwe, że dźwięk będzie bardziej skompresowany.",
+ "Pitch": "Rzucać",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Ustaw wysokość dźwięku, im wyższa wartość, tym wyższa wysokość.",
+ "Filter Radius": "Promień filtra",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Jeśli liczba ta jest większa lub równa trzem, zastosowanie filtrowania mediany na zebranych wynikach tonów może potencjalnie zmniejszyć oddychanie.",
+ "Search Feature Ratio": "Współczynnik funkcji wyszukiwania",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Wpływ wywierany przez plik indeksu; Wyższa wartość odpowiada większemu wpływowi. Jednak wybranie niższych wartości może pomóc w ograniczeniu artefaktów obecnych w dźwięku.",
+ "Volume Envelope": "Koperta objętości",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Zastąp lub zmieszaj z obwiednią głośności wyjścia. Im współczynnik jest bliższy 1, tym bardziej wykorzystywana jest obwiednia wyjściowa.",
+ "Protect Voiceless Consonants": "Chroń bezdźwięczne spółgłoski",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Chroń wyraźne spółgłoski i dźwięki oddychania, aby zapobiec rozdarciu elektroakustycznemu i innym artefaktom. Pociągnięcie parametru do maksymalnej wartości 0,5 zapewnia kompleksową ochronę. Jednak zmniejszenie tej wartości może zmniejszyć zakres ochrony, jednocześnie potencjalnie łagodząc efekt indeksowania.",
+ "Pitch extraction algorithm": "Algorytm ekstrakcji wysokości dźwięku",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algorytm ekstrakcji wysokości dźwięku do użycia do konwersji dźwięku. Domyślnym algorytmem jest rmvpe, który jest zalecany w większości przypadków.",
+ "Convert": "Nawrócić",
+ "Export Audio": "Eksportuj dźwięk",
+ "Batch": "Partia",
+ "Input Folder": "Folder wejściowy",
+ "Select the folder containing the audios to convert.": "Wybierz folder zawierający pliki audio do konwersji.",
+ "Enter input path": "Wprowadź ścieżkę wejściową",
+ "Output Folder": "Folder wyjściowy",
+ "Select the folder where the output audios will be saved.": "Wybierz folder, w którym zostaną zapisane wyjściowe pliki audio.",
+ "Enter output path": "Wprowadź ścieżkę wyjściową",
+ "Get information about the audio": "Uzyskiwanie informacji o dźwięku",
+ "Information about the audio file": "Informacje o pliku audio",
+ "Waiting for information...": "Czekam na informację...",
+ "## Voice Blender": "## Blender głosowy",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Wybierz dwa modele brzmienia, ustaw żądany procent mieszania i połącz je w zupełnie nowy głos.",
+ "Voice Blender": "Blender głosu",
+ "Drag and drop your model here": "Przeciągnij i upuść swój model tutaj",
+ "You can also use a custom path.": "Możesz również użyć ścieżki niestandardowej.",
+ "Blend Ratio": "Proporcje mieszania",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Dostosowanie pozycji bardziej w jedną lub drugą stronę sprawi, że model będzie bardziej podobny do pierwszego lub drugiego.",
+ "Fusion": "Fuzja",
+ "Path to Model": "Ścieżka do modelu",
+ "Enter path to model": "Wprowadź ścieżkę do modelu",
+ "Model information to be placed": "Informacje o modelu, które mają zostać umieszczone",
+ "Inroduce the model information": "Zapoznaj się z informacjami o modelu",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Informacje, które mają zostać umieszczone w modelu (możesz pozostawić je puste lub umieścić cokolwiek).",
+ "View model information": "Wyświetlanie informacji o modelu",
+ "Introduce the model pth path": "Wprowadzenie ścieżki pth modelu",
+ "View": "Widok",
+ "Model extraction": "Wyodrębnianie modelu",
+ "Model conversion": "Konwersja modelu",
+ "Pth file": "P-ty plik",
+ "Output of the pth file": "Wyjście pliku pth",
+ "# How to Report an Issue on GitHub": "# Jak zgłosić problem na GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Kliknij przycisk \"Ekran nagrywania\" poniżej, aby rozpocząć nagrywanie napotkanego problemu.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Po zakończeniu nagrywania problemu kliknij przycisk \"Zatrzymaj nagrywanie\" (ten sam przycisk, ale etykieta zmienia się w zależności od tego, czy aktywnie nagrywasz, czy nie).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Przejdź do [GitHub Issues](https://github.com/IAHispano/Applio/issues) i kliknij przycisk \"Nowe zgłoszenie\".",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Wypełnij dostarczony szablon problemu, upewniając się, że w razie potrzeby dołączyłeś szczegóły, i skorzystaj z sekcji zasobów, aby przesłać nagrany plik z poprzedniego kroku.",
+ "Record Screen": "Ekran nagrywania",
+ "Record": "Rekord",
+ "Stop Recording": "Zatrzymaj nagrywanie",
+ "Introduce the model .pth path": "Wprowadzenie ścieżki pth modelu",
+ "See Model Information": "Zobacz informacje o modelu",
+ "## Download Model": "## Pobierz model",
+ "Model Link": "Link do modelu",
+ "Introduce the model link": "Wprowadzenie linku do modelu",
+ "Download Model": "Pobierz model",
+ "## Drop files": "## Upuść pliki",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Przeciągnij plik .pth i plik .index do tego miejsca. Przeciągnij jedną, a potem drugą.",
+ "TTS Voices": "Głosy TTS",
+ "Select the TTS voice to use for the conversion.": "Wybierz głos TTS, który ma być używany do konwersji.",
+ "Text to Synthesize": "Tekst do syntezy",
+ "Enter the text to synthesize.": "Wprowadź tekst do syntezy.",
+ "Or you can upload a .txt file": "Możesz też przesłać plik .txt",
+ "Enter text to synthesize": "Wprowadzanie tekstu do syntezy",
+ "Output Path for TTS Audio": "Ścieżka wyjściowa dla TTS Audio",
+ "Output Path for RVC Audio": "Ścieżka wyjściowa dla dźwięku RVC",
+ "Enable Applio integration with Discord presence": "Włącz integrację Applio z obecnością Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Aktywuje możliwość wyświetlania aktualnej aktywności Applio w Discordzie.",
+ "Enable Applio integration with applio.org/models using flask": "Włączanie integracji aplikacji Applio z applio.org/models przy użyciu kolby",
+ "It will activate the possibility of downloading models with a click from the website.": "Aktywuje możliwość pobierania modeli jednym kliknięciem ze strony internetowej.",
+ "Theme": "Kompozycja",
+ "Select the theme you want to use. (Requires restarting Applio)": "Wybierz motyw, którego chcesz użyć. (Wymaga ponownego uruchomienia aplikacji)",
+ "Language": "Język",
+ "Select the language you want to use. (Requires restarting Applio)": "Wybierz język, którego chcesz używać. (Wymaga ponownego uruchomienia aplikacji)",
+ "Plugin Installer": "Instalator wtyczek",
+ "Drag your plugin.zip to install it": "Przeciągnij plugin.zip, aby go zainstalować",
+ "Version Checker": "Sprawdzanie wersji",
+ "Check which version of Applio is the latest to see if you need to update.": "Sprawdź, która wersja Applio jest najnowsza, aby sprawdzić, czy musisz ją zaktualizować.",
+ "Check for updates": "Sprawdź dostępność aktualizacji"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/pt_BR.json b/assets/i18n/languages/pt_BR.json
new file mode 100644
index 0000000000000000000000000000000000000000..d61efdf04d18a231dedb159a9d7e753191abfc88
--- /dev/null
+++ b/assets/i18n/languages/pt_BR.json
@@ -0,0 +1,308 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Conversão de Voz baseada em VITS focada em simplicidade, qualidade e desempenho.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém algumas utilidades extras que muitas vezes podem estar em fases experimentais.",
+ "Output Information": "Informações de Saída",
+ "The output information will be displayed here.": "As informações de saída serão exibidas aqui.",
+ "Inference": "Inferência",
+ "Train": "Treinar",
+ "Extra": "Extra",
+ "Merge Audios": "Mesclar Áudios",
+ "Processing": "Processando",
+ "Audio Analyzer": "Analisador de Áudio",
+ "Model Information": "Informações do Modelo",
+ "Plugins": "Plugins",
+ "Download": "Baixar",
+ "Report a Bug": "Reportar um Bug",
+ "Settings": "Configurações",
+ "Preprocess": "Pré-processar",
+ "Audio cutting": "Corte de Áudio",
+ "It's recommended to deactivate this option if your dataset has already been processed.": "Recomenda-se desativar esta opção se seu dataset já foi processado.",
+ "Process effects": "Processar efeitos",
+ "Model Name": "Nome do Modelo",
+ "Name of the new model.": "Nome do novo modelo.",
+ "Enter model name": "Digite o nome do modelo",
+ "Dataset Path": "Caminho do dataset",
+ "Path to the dataset folder.": "Caminho para a pasta do dataset.",
+ "Refresh Datasets": "Atualizar datasets",
+ "Dataset Creator": "Criador de dataset",
+ "Dataset Name": "Nome do dataset",
+ "Name of the new dataset.": "Nome do novo dataset.",
+ "Enter dataset name": "Digite o nome do dataset",
+ "Upload Audio Dataset": "Carregar dataset de Áudio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "O arquivo de áudio foi adicionado com sucesso ao dataset. Por favor, clique no botão de pré-processamento.",
+ "Enter dataset path": "Digite o caminho do dataset",
+ "Sampling Rate": "Sampling Rate",
+ "The sampling rate of the audio files.": "O sampling rate dos arquivos de áudio.",
+ "Model Architecture": "Versão do RVC",
+ "Version of the model architecture.": "A versão do RVC do modelo.",
+ "Preprocess Dataset": "Pré-processar dataset",
+ "Embedder Model": "Modelo de Embedding",
+ "Model used for learning speaker embedding.": "Modelo usado para aprender a incorporação do locutor.",
+ "Extract": "Extrair",
+ "Hop Length": "Hop Length",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Denota a duração que o sistema leva para transitar para uma mudança significativa de tom. Comprimentos de salto menores requerem mais tempo para inferência, mas tendem a proporcionar maior precisão de tom.",
+ "Batch Size": "Tamanho do Lote",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "É aconselhável alinhá-lo com a VRAM disponível da sua GPU. Uma configuração de 4 oferece maior precisão, mas processamento mais lento, enquanto 8 proporciona resultados mais rápidos e padrão.",
+ "Save Every Epoch": "Salvar a Cada Epochs",
+ "Determine at how many epochs the model will saved at.": "Determine em quantas epochs o modelo será salvo.",
+ "Total Epoch": "Total de Epochs",
+ "Specifies the overall quantity of epochs for the model training process.": "Especifica a quantidade total de epochs para o processo de treinamento do modelo.",
+ "Pretrained": "Pré-treino",
+ "Save Only Latest": "Salvar Apenas o Mais Recente",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Ativar esta configuração fará com que os arquivos G e D salvem apenas suas versões mais recentes, economizando espaço de armazenamento.",
+ "Save Every Weights": "Salvar Todos os Arquivos de modelo",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Esta configuração permite salvar os arquivos de modelos ao final de cada epoch.",
+ "Custom Pretrained": "Pré-treino Personalizado",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Utilizar modelos Pré-treino personalizados pode levar a resultados superiores, pois selecionar os modelos Pré-treino mais adequados para o caso específico pode melhorar significativamente o desempenho.",
+ "Upload Pretrained Model": "Carregar Modelo Pré-treino",
+ "Refresh Custom Pretraineds": "Atualizar Pré-treino Personalizados",
+ "Pretrained Custom Settings": "Configurações Personalizadas de Pré-treino",
+ "The file you dropped is not a valid pretrained file. Please try again.": "O arquivo que você soltou não é um arquivo Pré-treino válido. Por favor, tente novamente.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Clique no botão de atualizar para ver o arquivo Pré-treino no menu suspenso.",
+ "Pretrained G Path": "Caminho do Pré-treino G",
+ "Pretrained D Path": "Caminho do Pré-treino D",
+ "GPU Settings": "Configurações de GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Define configurações avançadas de GPU, recomendadas para usuários com melhor arquitetura de GPU.",
+ "GPU Custom Settings": "Configurações Personalizadas de GPU",
+ "GPU Number": "Número da GPU",
+ "0 to ∞ separated by -": "0 a ∞ separados por -",
+ "The GPU information will be displayed here.": "As informações da GPU serão exibidas aqui.",
+ "Specify the number of GPUs you wish to utilize for preprocess by entering them separated by hyphens (-). At the moment, using multi-gpu will not have a significant effect.": "Especifique o número de GPUs que você deseja utilizar para pré-processamento, inserindo-os separados por hífens (-). No momento, usar várias GPUs não terá um efeito significativo.",
+ "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-).": "Especifique o número de GPUs que você deseja utilizar para extração, inserindo-os separados por hífens (-).",
+ "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases.": "O número de núcleos de CPU a serem usados no pré-processamento. A configuração padrão são seus núcleos de CPU, o que é recomendado para a maioria dos casos.",
+ "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases.": "O número de núcleos de CPU a serem usados no processo de extração. A configuração padrão são seus núcleos de CPU, o que é recomendado para a maioria dos casos.",
+ "GPU Information": "Informações da GPU",
+ "Pitch Guidance": "Orientação de Tom",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Ao empregar a orientação de tom, torna-se viável espelhar a entonação da voz original, incluindo seu tom. Este recurso é particularmente valioso para canto e outros cenários onde preservar a melodia ou padrão de tom original é essencial.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilize modelos Pré-treino ao treinar o seu próprio. Esta abordagem reduz a duração do treinamento e melhora a qualidade geral.",
+ "Extract Features": "Extrair Recursos",
+ "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "Prioritizamos a execução da extração do modelo na GPU para um desempenho mais rápido. Se você preferir usar a CPU, simplesmente deixe o campo da GPU em branco.",
+ "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "Prioritizamos a execução do pré-processamento do modelo na GPU para um desempenho mais rápido. Se você preferir usar a CPU, simplesmente deixe o campo da GPU em branco.",
+ "Overtraining Detector": "Detector de Overtrain",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Detecte o Overtrain para evitar que o modelo aprenda os dados de treinamento muito bem e perca a capacidade de generalizar para novos dados.",
+ "Overtraining Detector Settings": "Configurações do Detector de Overtrain",
+ "Overtraining Threshold": "Limite de Overtrain",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Defina o número máximo de épocas que você deseja que seu modelo pare de treinar se nenhuma melhoria for detectada.",
+ "Sync Graph": "Sincronizar Gráfico",
+ "Synchronize the graph of the tensorboard. Only enable this setting if you are training a new model.": "Sincronize o gráfico do tensorboard. Ative esta configuração apenas se você estiver treinando um novo modelo.",
+ "Start Training": "Iniciar Treinamento",
+ "Stop Training": "Parar Treinamento",
+ "Generate Index": "Gerar index",
+ "Export Model": "Exportar Modelo",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "O botão 'Carregar' é apenas para google colab: Carrega os arquivos exportados para a pasta ApplioExported no seu Google Drive.",
+ "Exported Pth file": "Arquivo Pth Exportado",
+ "Exported Index file": "Arquivo de index Exportado",
+ "Select the pth file to be exported": "Selecione o arquivo pth a ser exportado",
+ "Select the index file to be exported": "Selecione o arquivo de index a ser exportado",
+ "Upload": "Carregar",
+ "Voice Model": "Modelo de Voz",
+ "Select the voice model to use for the conversion.": "Selecione o modelo de voz a ser usado para a conversão.",
+ "Index File": "Arquivo de index",
+ "Select the index file to use for the conversion.": "Selecione o arquivo de índice a ser usado para a conversão.",
+ "Refresh": "Atualizar",
+ "Unload Voice": "Descarregar Voz",
+ "Single": "Um arquivo",
+ "Upload Audio": "Carregar Áudio",
+ "Select Audio": "Selecionar Áudio",
+ "Select the audio to convert.": "Selecione o áudio a ser convertido.",
+ "Advanced Settings": "Configurações Avançadas",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Limpar Saídas (Exclui todos os áudios em assets/audios)",
+ "Custom Output Path": "Caminho de Saída Personalizado",
+ "Output Path": "Caminho de Saída",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "O caminho onde o áudio de saída será salvo, por padrão em assets/audios/output.wav",
+ "Split Audio": "Dividir Áudio",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Divida o áudio em partes para inferência para obter melhores resultados em alguns casos.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Aplique um autotune suave às suas inferências, recomendado para conversões de canto.",
+ "Clean Audio": "Limpar Áudio",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Limpe sua saída de áudio usando algoritmos de detecção de ruído, recomendado para áudios de fala.",
+ "Clean Strength": "Força de Limpeza",
+ "Upscale Audio": "Upscale Áudio",
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Aprimore o áudio para uma qualidade superior, recomendado para áudios de baixa qualidade. (Pode demorar mais para processar o áudio)",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Defina o nível de limpeza para o áudio desejado, quanto mais você aumentar, mais ele será limpo, mas é possível que o áudio fique mais comprimido.",
+ "Formant Shifting": "Mudança de Formante",
+ "Enable formant shifting. Used for male to female and vice-versa convertions.": "Ative a mudança de formante. Usado para conversões de masculino para feminino e vice-versa.",
+ "Browse presets for formanting": "Procurar predefinições para formanting",
+ "Presets are located in /assets/formant_shift folder": "As predefinições estão localizadas na pasta /assets/formant_shift",
+ "Default value is 1.0": "O valor padrão é 1.0",
+ "Quefrency for formant shifting": "Quefrência para mudança de formante",
+ "Timbre for formant shifting": "Timbre para mudança de formante",
+ "Pitch": "Pitch",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Defina o Pitch do áudio, quanto maior o valor, mais alto será o Pitch.",
+ "Filter Radius": "Filter Radius",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Se o número for maior ou igual a três, empregar filtragem mediana nos resultados do tom coletado tem o potencial de diminuir a respiração.",
+ "Search Feature Ratio": "Proporção de Recurso de Pesquisa",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influência exercida pelo arquivo de índice; um valor mais alto corresponde a uma maior influência. No entanto, optar por valores mais baixos pode ajudar a mitigar artefatos presentes no áudio.",
+ "Volume Envelope": "Envelope de Volume",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Substitua ou misture com o envelope de volume da saída. Quanto mais próximo o valor estiver de 1, mais o envelope de saída será empregado.",
+ "Protect Voiceless Consonants": "Proteger Consoantes Surdas",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Proteja consoantes distintas e sons de respiração para evitar rasgos eletroacústicos e outros artefatos. Puxar o parâmetro para seu valor máximo de 0,5 oferece proteção abrangente. No entanto, reduzir esse valor pode diminuir a extensão da proteção, enquanto potencialmente mitiga o efeito de indexação.",
+ "Pitch extraction algorithm": "Algoritmo de extração de Pitch",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritmo de extração de Pitch a ser usado para a conversão de áudio. O algoritmo padrão é rmvpe, que é recomendado para a maioria dos casos.",
+ "Convert": "Converter",
+ "Export Audio": "Exportar Áudio",
+ "Batch": "Varios arquivos",
+ "Input Folder": "Pasta de Entrada",
+ "Select the folder containing the audios to convert.": "Selecione a pasta contendo os áudios a serem convertidos.",
+ "Enter input path": "Digite o caminho de entrada",
+ "Output Folder": "Pasta de Saída",
+ "Select the folder where the output audios will be saved.": "Selecione a pasta onde os áudios de saída serão salvos.",
+ "Enter output path": "Digite o caminho de saída",
+ "Get information about the audio": "Obter informações sobre o áudio",
+ "## Voice Blender": "## Fusão de voz",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Selecione dois modelos de voz, defina a porcentagem de Mix desejada e misture-os em uma nova voz.",
+ "Voice Blender": "Fusão de voz",
+ "Drag and drop your model here": "Arraste e solte seu modelo aqui",
+ "You can also use a custom path.": "Você também pode usar um caminho personalizado.",
+ "Blend Ratio": "Taxa de Fusão",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Ajustar a posição mais para um lado ou para o outro tornará o modelo mais semelhante ao primeiro ou ao segundo.",
+ "Fusion": "Fusão",
+ "Path to Model": "Caminho para o Modelo",
+ "Enter path to model": "Digite o caminho para o modelo",
+ "Model information to be placed": "Informações do modelo a serem colocadas",
+ "Inroduce the model information": "Introduza as informações do modelo",
+ "The information to be placed in the model (You can leave it blank or put anything).": "As informações a serem colocadas no modelo (Você pode deixar em branco ou colocar qualquer coisa).",
+ "View model information": "Ver informações do modelo",
+ "Introduce the model pth path": "Introduza o caminho do modelo pth",
+ "View": "Ver",
+ "Model extraction": "Extração de modelo",
+ "Model conversion": "Conversão de modelo",
+ "Pth file": "Arquivo Pth",
+ "Output of the pth file": "Saída do arquivo pth",
+ "Extract F0 Curve": "Extrair Curva F0",
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "A curva f0 representa as variações na frequência base de uma voz ao longo do tempo, mostrando como o tom sobe e desce.",
+ "# How to Report an Issue on GitHub": "# Como Reportar um Problema no GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Clique no botão 'Gravar Tela' abaixo para começar a gravar o problema que você está enfrentando.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Uma vez que você tenha terminado de gravar o problema, clique no botão 'Parar Gravação' (o mesmo botão, mas o rótulo muda dependendo se você está gravando ativamente ou não).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vá para [GitHub Issues](https://github.com/IAHispano/Applio/issues) e clique no botão 'New Issue'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete o modelo de problema fornecido, garantindo incluir detalhes conforme necessário, e utilize a seção de ativos para carregar o arquivo gravado na etapa anterior.",
+ "Record Screen": "Gravar Tela",
+ "Record": "Gravar",
+ "Stop Recording": "Parar Gravação",
+ "Introduce the model .pth path": "Introduza o caminho do modelo .pth",
+ "See Model Information": "Ver Informações do Modelo",
+ "## Download Model": "## Baixar Modelo",
+ "Model Link": "Link do Modelo",
+ "Introduce the model link": "Introduza o link do modelo",
+ "Download Model": "Baixar Modelo",
+ "## Drop files": "## Soltar arquivos",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arraste seu arquivo .pth e arquivo .index para este espaço. Arraste um e depois o outro.",
+ "## Search Model": "## Buscar Modelo",
+ "Search": "Buscar",
+ "Introduce the model name to search.": "Introduza o nome do modelo para buscar.",
+ "We couldn't find models by that name.": "Não conseguimos encontrar modelos com esse nome.",
+ "## Download Pretrained Models": "## Baixar Modelos Pré-treinados",
+ "Select the pretrained model you want to download.": "Selecione o modelo pré-treino que você deseja baixar.",
+ "And select the sampling rate": "E selecione a taxa de amostragem.",
+ "TTS Voices": "Vozes TTS",
+ "TTS Speed": "Velocidade TTS",
+ "Increase or decrease TTS speed.": "Aumentar ou diminuir a velocidade do TTS.",
+ "Select the TTS voice to use for the conversion.": "Selecione a voz TTS a ser usada para a conversão.",
+ "Text to Synthesize": "Texto para Sintetizar",
+ "Enter the text to synthesize.": "Digite o texto para sintetizar.",
+ "Or you can upload a .txt file": "Ou você pode carregar um arquivo .txt",
+ "Enter text to synthesize": "Digite o texto para sintetizar",
+ "Output Path for TTS Audio": "Caminho de Saída para Áudio do TTS",
+ "Output Path for RVC Audio": "Caminho de Saída para Áudio do RVC",
+ "Enable Applio integration with Discord presence": "Ativar integração do Applio com presença no Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Isso ativará a possibilidade de exibir a atividade atual do Applio no Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Ativar integração do Applio com applio.org/models usando flask",
+ "It will activate the possibility of downloading models with a click from the website.": "Isso ativará a possibilidade de baixar modelos com um clique no site.",
+ "Enable fake GPU": "Ativar GPU falsa",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "O treinamento atualmente não é suportado devido à ausência de uma GPU. Para ativar a aba de treinamento, navegue até a aba de configurações e ative a opção 'GPU Falsa'.",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Ativa a aba de treinamento. No entanto, observe que este dispositivo não possui capacidades de GPU, portanto, o treinamento não é suportado. Esta opção é apenas para fins de teste. (Esta opção reiniciará o Applio)",
+ "Theme": "Tema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Selecione o tema que você deseja usar. (Requer reiniciar o Applio)",
+ "Language": "Idioma",
+ "Select the language you want to use. (Requires restarting Applio)": "Selecione o idioma que você deseja usar. (Requer reiniciar o Applio)",
+ "Precision": "Precisão",
+ "Select the precision you want to use for training and inference.": "Selecione a precisão que você deseja usar para treinamento e inferência.",
+ "Update precision": "Atualizar precisão",
+ "Plugin Installer": "Instalador de Plugin",
+ "Drag your plugin.zip to install it": "Arraste seu plugin.zip para instalá-lo",
+ "Version Checker": "Verificador de Versão",
+ "Check which version of Applio is the latest to see if you need to update.": "Verifique qual versão do Applio é a mais recente para ver se você precisa atualizar.",
+ "Check for updates": "Verificar atualizações",
+ "Post-Process": "Pós-processamento",
+ "Post-process the audio to apply effects to the output.": "Pós-processar o áudio para aplicar efeitos na saída.",
+ "Reverb": "Reverberação",
+ "Apply reverb to the audio.": "Aplicar reverberação ao áudio.",
+ "Reverb Room Size": "Tamanho da Sala de Reverberação",
+ "Set the room size of the reverb.": "Defina o tamanho da sala da reverberação.",
+ "Reverb Damping": "Amortecimento da Reverberação",
+ "Set the damping of the reverb.": "Defina o amortecimento da reverberação.",
+ "Reverb Wet Gain": "Ganho Molhado da Reverberação",
+ "Set the wet gain of the reverb.": "Defina o ganho molhado da reverberação.",
+ "Reverb Dry Gain": "Ganho Seco da Reverberação",
+ "Set the dry gain of the reverb.": "Defina o ganho seco da reverberação.",
+ "Reverb Width": "Largura da Reverberação",
+ "Set the width of the reverb.": "Defina a largura da reverberação.",
+ "Reverb Freeze Mode": "Modo de Congelamento da Reverberação",
+ "Set the freeze mode of the reverb.": "Defina o modo de congelamento da reverberação.",
+ "Pitch Shift": "Mudança de Pitch",
+ "Apply pitch shift to the audio.": "Aplicar mudança de Pitch ao áudio.",
+ "Pitch Shift Semitones": "Semitons de Mudança de Pitch",
+ "Set the pitch shift semitones.": "Defina os semitons de mudança de Pitch.",
+ "Limiter": "Limitador",
+ "Apply limiter to the audio.": "Aplicar limitador ao áudio.",
+ "Limiter Threshold dB": "Limite do Limitador em dB",
+ "Set the limiter threshold dB.": "Defina o limite do limitador em dB.",
+ "Limiter Release Time": "Tempo de Liberação do Limitador",
+ "Set the limiter release time.": "Defina o tempo de liberação do limitador.",
+ "Gain": "Ganho",
+ "Apply gain to the audio.": "Aplicar ganho ao áudio.",
+ "Gain dB": "Ganho dB",
+ "Set the gain dB.": "Defina o ganho em dB.",
+ "Distortion": "Distorção",
+ "Apply distortion to the audio.": "Aplicar distorção ao áudio.",
+ "Distortion Gain": "Ganho de Distorção",
+ "Set the distortion gain.": "Defina o ganho de distorção.",
+ "Chorus": "Chorus",
+ "Apply chorus to the audio.": "Aplicar Chorus ao áudio.",
+ "Chorus Rate Hz": "Taxa de Chorus Hz",
+ "Set the chorus rate Hz.": "Defina a taxa de Chorus em Hz.",
+ "Chorus Depth": "Profundidade do Chorus",
+ "Set the chorus depth.": "Defina a profundidade do Chorus.",
+ "Chorus Center Delay ms": "Delay Central do Chorus ms",
+ "Set the chorus center delay ms.": "Defina o Delay central do Chorus em ms.",
+ "Chorus Feedback": "Feedback do Chorus",
+ "Set the chorus feedback.": "Defina o feedback do Chorus.",
+ "Chorus Mix": "Mix do Chorus",
+ "Set the chorus mix.": "Defina a Mix do Chorus.",
+ "Bitcrush": "Bitcrush",
+ "Apply bitcrush to the audio.": "Aplicar bitcrush ao áudio.",
+ "Bitcrush Bit Depth": "Profundidade de Bits do Bitcrush",
+ "Set the bitcrush bit depth.": "Defina a profundidade de bits do bitcrush.",
+ "Clipping": "Clipping",
+ "Apply clipping to the audio.": "Aplicar clipping ao áudio.",
+ "Clipping Threshold": "Limite de Clipping",
+ "Set the clipping threshold.": "Defina o limite de clipping.",
+ "Compressor": "Compressor",
+ "Apply compressor to the audio.": "Aplicar compressor ao áudio.",
+ "Compressor Threshold dB": "Limite do Compressor em dB",
+ "Set the compressor threshold dB.": "Defina o limite do compressor em dB.",
+ "Compressor Ratio": "Taxa do Compressor",
+ "Set the compressor ratio.": "Defina a taxa do compressor.",
+ "Compressor Attack ms": "Ataque do Compressor ms",
+ "Set the compressor attack ms.": "Defina o ataque do compressor em ms.",
+ "Compressor Release ms": "Liberação do Compressor ms",
+ "Set the compressor release ms.": "Defina a liberação do compressor em ms.",
+ "Delay": "Delay",
+ "Apply delay to the audio.": "Aplicar Delay ao áudio.",
+ "Delay Seconds": "Segundos de Delay",
+ "Set the delay seconds.": "Defina os segundos de Delay.",
+ "Delay Feedback": "Feedback do Delay",
+ "Set the delay feedback.": "Defina o feedback do Delay.",
+ "Delay Mix": "Mix do Delay",
+ "Set the delay mix.": "Defina a Mix do Delay.",
+ "Custom Embedder": "Embedder Personalizado",
+ "Select Custom Embedder": "Selecionar Embedder Personalizado",
+ "Refresh embedders": "Atualizar Embedderes",
+ "Folder Name": "Nome da Pasta",
+ "Upload .bin": "Carregar .bin",
+ "Upload .json": "Carregar .json",
+ "Move files to custom embedder folder": "Mover arquivos para a pasta de Embedder personalizado",
+ "model information": "informações do modelo",
+ "Model Creator": "Criador do Modelo",
+ "Name of the model creator. (Default: Unknown)": "Nome do criador do modelo. (Padrão: Unknown)"
+}
diff --git a/assets/i18n/languages/pt_PT.json b/assets/i18n/languages/pt_PT.json
new file mode 100644
index 0000000000000000000000000000000000000000..52cd7b8a38c66c3378ce98aebad927984d8d012d
--- /dev/null
+++ b/assets/i18n/languages/pt_PT.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "A melhor ferramenta de clonagem de voz, meticulosamente otimizada para potência, modularidade e experiência de fácil utilização incomparáveis.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém alguns utilitários extras que muitas vezes podem estar em fases experimentais.",
+ "Output Information": "Informações de saída",
+ "The output information will be displayed here.": "As informações de saída serão exibidas aqui.",
+ "Inference": "Inferência",
+ "Train": "Trem",
+ "Extra": "Extra",
+ "Merge Audios": "Mesclar áudios",
+ "Processing": "Processamento",
+ "Audio Analyzer": "Analisador de áudio",
+ "Model Information": "Informações do modelo",
+ "Plugins": "Plug-ins",
+ "Download": "Baixar",
+ "Report a Bug": "Relatar um bug",
+ "Settings": "Configurações",
+ "Preprocess": "Pré-processar",
+ "Model Name": "Nome do modelo",
+ "Name of the new model.": "Nome do novo modelo.",
+ "Enter model name": "Digite o nome do modelo",
+ "Dataset Path": "Caminho do conjunto de dados",
+ "Path to the dataset folder.": "Caminho para a pasta do conjunto de dados.",
+ "Refresh Datasets": "Atualizar conjuntos de dados",
+ "Dataset Creator": "Criador de conjunto de dados",
+ "Dataset Name": "Nome do conjunto de dados",
+ "Name of the new dataset.": "Nome do novo conjunto de dados.",
+ "Enter dataset name": "Insira o nome do conjunto de dados",
+ "Upload Audio Dataset": "Carregar conjunto de dados de áudio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "O arquivo de áudio foi adicionado com êxito ao conjunto de dados. Clique no botão de pré-processo.",
+ "Enter dataset path": "Inserir caminho do conjunto de dados",
+ "Sampling Rate": "Taxa de amostragem",
+ "The sampling rate of the audio files.": "A taxa de amostragem dos arquivos de áudio.",
+ "Model Architecture": "Versão RVC",
+ "Version of the model architecture.": "A versão RVC do modelo.",
+ "Preprocess Dataset": "Pré-processar conjunto de dados",
+ "Extract": "Extrair",
+ "Hop Length": "Comprimento do salto",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Denota a duração necessária para que o sistema faça a transição para uma mudança de tom significativa. Comprimentos de salto menores requerem mais tempo para inferência, mas tendem a produzir maior precisão de pitch.",
+ "Batch Size": "Tamanho do lote",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "É aconselhável alinhá-lo com a VRAM disponível da sua GPU. Uma configuração de 4 oferece precisão aprimorada, mas processamento mais lento, enquanto 8 fornece resultados mais rápidos e padrão.",
+ "Save Every Epoch": "Salve todas as épocas",
+ "Determine at how many epochs the model will saved at.": "Determine em quantas épocas o modelo será salvo.",
+ "Total Epoch": "Época Total",
+ "Specifies the overall quantity of epochs for the model training process.": "Especifica a quantidade total de épocas para o processo de treinamento do modelo.",
+ "Pretrained": "Pré-treinado",
+ "Save Only Latest": "Salvar somente as últimas",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Habilitar essa configuração resultará nos arquivos G e D salvando apenas suas versões mais recentes, efetivamente conservando espaço de armazenamento.",
+ "Save Every Weights": "Economize todos os pesos",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Essa configuração permite que você salve os pesos do modelo na conclusão de cada época.",
+ "Custom Pretrained": "Pré-treinado personalizado",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "A utilização de modelos pré-treinados personalizados pode levar a resultados superiores, pois a seleção dos modelos pré-treinados mais adequados adaptados ao caso de uso específico pode melhorar significativamente o desempenho.",
+ "Upload Pretrained Model": "Carregar modelo pré-treinado",
+ "Refresh Custom Pretraineds": "Atualizar pré-treinados personalizados",
+ "Pretrained Custom Settings": "Configurações personalizadas pré-treinadas",
+ "The file you dropped is not a valid pretrained file. Please try again.": "O arquivo descartado não é um arquivo pré-treinado válido. Tente novamente.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Clique no botão Atualizar para ver o arquivo pré-treinado no menu suspenso.",
+ "Pretrained G Path": "G pré-treinado personalizado",
+ "Pretrained D Path": "D pré-treinado personalizado",
+ "GPU Settings": "Configurações da GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Define configurações avançadas de GPU, recomendadas para usuários com melhor arquitetura de GPU.",
+ "GPU Custom Settings": "Configurações personalizadas da GPU",
+ "GPU Number": "Número da GPU",
+ "0 to ∞ separated by -": "0 a ∞ separados por -",
+ "GPU Information": "Informações da GPU",
+ "Pitch Guidance": "Orientação de Pitch",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Ao empregar a orientação de pitch, torna-se viável espelhar a entonação da voz original, incluindo seu pitch. Esta característica é particularmente valiosa para o canto e outros cenários onde preservar a melodia original ou padrão de tom é essencial.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilize modelos pré-treinados ao treinar seus próprios. Essa abordagem reduz a duração do treinamento e melhora a qualidade geral.",
+ "Extract Features": "Recursos de extração",
+ "Start Training": "Comece a Treinar",
+ "Generate Index": "Gerar índice",
+ "Voice Model": "Modelo de Voz",
+ "Select the voice model to use for the conversion.": "Selecione o modelo de voz a ser usado para a conversão.",
+ "Index File": "Arquivo de índice",
+ "Select the index file to use for the conversion.": "Selecione o arquivo de índice a ser usado para a conversão.",
+ "Refresh": "Atualizar",
+ "Unload Voice": "Descarregar voz",
+ "Single": "Único",
+ "Upload Audio": "Carregar áudio",
+ "Select Audio": "Selecione Áudio",
+ "Select the audio to convert.": "Selecione o áudio a ser convertido.",
+ "Advanced Settings": "Configurações avançadas",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Limpar saídas (exclui todos os áudios em ativos/áudios)",
+ "Custom Output Path": "Caminho de saída personalizado",
+ "Output Path": "Caminho de saída",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "O caminho onde o áudio de saída será salvo, por padrão em ativos/áudios/output.wav",
+ "Split Audio": "Áudio dividido",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Divida o áudio em pedaços para inferência para obter melhores resultados em alguns casos.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Aplique um autotune suave às suas inferências, recomendado para conversões de canto.",
+ "Clean Audio": "Áudio limpo",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Limpe sua saída de áudio usando algoritmos de detecção de ruído, recomendados para falar áudios.",
+ "Clean Strength": "Força Limpa",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Defina o nível de limpeza para o áudio desejado, quanto mais você aumentá-lo, mais ele será limpo, mas é possível que o áudio seja mais compactado.",
+ "Pitch": "Campo",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Defina o tom do áudio, quanto maior o valor, maior o pitch.",
+ "Filter Radius": "Raio do filtro",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Se o número for maior ou igual a três, empregar a filtragem mediana nos resultados de tom coletados tem o potencial de diminuir a respiração.",
+ "Search Feature Ratio": "Taxa de recursos de pesquisa",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influência exercida pelo arquivo de índice; quanto maior o valor corresponde maior a influência. No entanto, optar por valores mais baixos pode ajudar a mitigar artefatos presentes no áudio.",
+ "Volume Envelope": "Volume Envelope",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Substitua ou misture com o envelope de volume da saída. Quanto mais próxima a proporção estiver de 1, mais o envelope de saída será empregado.",
+ "Protect Voiceless Consonants": "Proteja as consoantes surdas",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Proteja consoantes distintas e sons respiratórios para evitar rasgos eletroacústicos e outros artefatos. Puxar o parâmetro para seu valor máximo de 0,5 oferece proteção abrangente. No entanto, a redução desse valor pode diminuir a extensão da proteção e, ao mesmo tempo, potencialmente atenuar o efeito de indexação.",
+ "Pitch extraction algorithm": "Algoritmo de extração de pitch",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritmo de extração de pitch para usar na conversão de áudio. O algoritmo padrão é rmvpe, que é recomendado para a maioria dos casos.",
+ "Convert": "Converter",
+ "Export Audio": "Exportar áudio",
+ "Batch": "Lote",
+ "Input Folder": "Pasta de entrada",
+ "Select the folder containing the audios to convert.": "Selecione a pasta que contém os áudios a serem convertidos.",
+ "Enter input path": "Insira o caminho de entrada",
+ "Output Folder": "Pasta de saída",
+ "Select the folder where the output audios will be saved.": "Selecione a pasta onde os áudios de saída serão salvos.",
+ "Enter output path": "Insira o caminho de saída",
+ "Get information about the audio": "Obter informações sobre o áudio",
+ "Information about the audio file": "Informações sobre o arquivo de áudio",
+ "Waiting for information...": "Aguardando informações...",
+ "## Voice Blender": "## Liquidificador de Voz",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Selecione dois modelos de voz, defina a porcentagem de mistura desejada e misture-os em uma voz totalmente nova.",
+ "Voice Blender": "Liquidificador de voz",
+ "Drag and drop your model here": "Arraste e solte seu modelo aqui",
+ "You can also use a custom path.": "Você também pode usar um caminho personalizado.",
+ "Blend Ratio": "Proporção de mistura",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Ajustar a posição mais para um lado ou para o outro tornará o modelo mais semelhante ao primeiro ou ao segundo.",
+ "Fusion": "Fusão",
+ "Path to Model": "Caminho para o modelo",
+ "Enter path to model": "Insira o caminho para o modelo",
+ "Model information to be placed": "Informações do modelo a ser colocado",
+ "Inroduce the model information": "Inroduce as informações do modelo",
+ "The information to be placed in the model (You can leave it blank or put anything).": "As informações a serem colocadas no modelo (Você pode deixá-lo em branco ou colocar qualquer coisa).",
+ "View model information": "Exibir informações do modelo",
+ "Introduce the model pth path": "Apresentar o caminho pth do modelo",
+ "View": "Vista",
+ "Model extraction": "Extração do modelo",
+ "Model conversion": "Conversão de modelos",
+ "Pth file": "Arquivo Pth",
+ "Output of the pth file": "Saída do arquivo pth",
+ "# How to Report an Issue on GitHub": "# Como relatar um problema no GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Clique no botão 'Gravar tela' abaixo para começar a gravar o problema que você está enfrentando.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Depois de terminar de gravar o problema, clique no botão 'Parar gravação' (o mesmo botão, mas a etiqueta muda dependendo se você está gravando ativamente ou não).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vá para [Problemas do GitHub](https://github.com/IAHispano/Applio/issues) e clique no botão 'Novo problema'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Preencha o modelo de problema fornecido, garantindo incluir detalhes conforme necessário, e utilize a seção de ativos para carregar o arquivo gravado da etapa anterior.",
+ "Record Screen": "Tela de gravação",
+ "Record": "Registro",
+ "Stop Recording": "Parar gravação",
+ "Introduce the model .pth path": "Apresentar o caminho .pth do modelo",
+ "See Model Information": "Ver informações do modelo",
+ "## Download Model": "## Baixar Modelo",
+ "Model Link": "Link do modelo",
+ "Introduce the model link": "Apresentar o link do modelo",
+ "Download Model": "Download do Modelo",
+ "## Drop files": "## Soltar arquivos",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arraste o arquivo .pth e o arquivo .index para este espaço. Arraste um e depois o outro.",
+ "TTS Voices": "Vozes TTS",
+ "Select the TTS voice to use for the conversion.": "Selecione a voz TTS a ser usada para a conversão.",
+ "Text to Synthesize": "Texto para sintetizar",
+ "Enter the text to synthesize.": "Digite o texto a ser sintetizado.",
+ "Or you can upload a .txt file": "Ou você pode carregar um arquivo .txt",
+ "Enter text to synthesize": "Digite o texto para sintetizar",
+ "Output Path for TTS Audio": "Caminho de saída para áudio TTS",
+ "Output Path for RVC Audio": "Caminho de saída para áudio RVC",
+ "Enable Applio integration with Discord presence": "Habilitar a integração do Applio com a presença do Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Ele ativará a possibilidade de exibir a atividade atual do Applio no Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Habilitar a integração do Applio com applio.org/models usando frasco",
+ "It will activate the possibility of downloading models with a click from the website.": "Ele ativará a possibilidade de baixar modelos com um clique no site.",
+ "Theme": "Tema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Selecione o tema que deseja usar. (Requer reiniciar o Applio)",
+ "Language": "Idioma",
+ "Select the language you want to use. (Requires restarting Applio)": "Selecione o idioma que deseja usar. (Requer reiniciar o Applio)",
+ "Plugin Installer": "Instalador de Plug-ins",
+ "Drag your plugin.zip to install it": "Arraste o plugin.zip para instalá-lo",
+ "Version Checker": "Verificador de versão",
+ "Check which version of Applio is the latest to see if you need to update.": "Verifique qual versão do Applio é a mais recente para ver se você precisa atualizar.",
+ "Check for updates": "Verificar se há atualizações"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/ro_RO.json b/assets/i18n/languages/ro_RO.json
new file mode 100644
index 0000000000000000000000000000000000000000..990d46f34bc39c723b2535fb4de40ce58dd98b59
--- /dev/null
+++ b/assets/i18n/languages/ro_RO.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Cel mai bun instrument de clonare a vocii, optimizat meticulos pentru putere, modularitate și experiență ușor de utilizat.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Această secțiune conține câteva utilități suplimentare care pot fi adesea în faze experimentale.",
+ "Output Information": "Informații despre ieșire",
+ "The output information will be displayed here.": "Informațiile de ieșire vor fi afișate aici.",
+ "Inference": "Deducţie",
+ "Train": "Tren",
+ "Extra": "Superfluu",
+ "Merge Audios": "Îmbinare audio",
+ "Processing": "Prelucrare",
+ "Audio Analyzer": "Analizor audio",
+ "Model Information": "Informații despre model",
+ "Plugins": "Plugin-uri",
+ "Download": "Descărca",
+ "Report a Bug": "Raportați o eroare",
+ "Settings": "Setări",
+ "Preprocess": "Preproces",
+ "Model Name": "Numele modelului",
+ "Name of the new model.": "Numele noului model.",
+ "Enter model name": "Introduceți numele modelului",
+ "Dataset Path": "Calea setului de date",
+ "Path to the dataset folder.": "Calea către folderul setului de date.",
+ "Refresh Datasets": "Reîmprospătarea seturilor de date",
+ "Dataset Creator": "Creator de seturi de date",
+ "Dataset Name": "Numele setului de date",
+ "Name of the new dataset.": "Numele noului set de date.",
+ "Enter dataset name": "Introduceți numele setului de date",
+ "Upload Audio Dataset": "Încărcați setul de date audio",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Fișierul audio a fost adăugat cu succes la setul de date. Vă rugăm să faceți clic pe butonul de preprocesare.",
+ "Enter dataset path": "Introduceți calea setului de date",
+ "Sampling Rate": "Rata de eșantionare",
+ "The sampling rate of the audio files.": "Rata de eșantionare a fișierelor audio.",
+ "Model Architecture": "Versiunea RVC",
+ "Version of the model architecture.": "Versiunea RVC a modelului.",
+ "Preprocess Dataset": "Set de date preproces",
+ "Extract": "Extract",
+ "Hop Length": "Lungimea hameiului",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Denotă durata necesară pentru ca sistemul să treacă la o schimbare semnificativă a înălțimii. Lungimile mai mici ale hameiului necesită mai mult timp pentru inferență, dar tind să producă o precizie mai mare a pasului.",
+ "Batch Size": "Mărimea lotului",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Este recomandabil să îl aliniați cu VRAM disponibil al GPU-ului. O setare de 4 oferă o precizie îmbunătățită, dar o procesare mai lentă, în timp ce 8 oferă rezultate mai rapide și standard.",
+ "Save Every Epoch": "Salvați fiecare epocă",
+ "Determine at how many epochs the model will saved at.": "Determinați la câte epoci va fi salvat modelul.",
+ "Total Epoch": "Epoca totală",
+ "Specifies the overall quantity of epochs for the model training process.": "Specifică numărul total de epoci pentru procesul de instruire a modelului.",
+ "Pretrained": "Preinstruit",
+ "Save Only Latest": "Salvați numai cele mai recente",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Activarea acestei setări va avea ca rezultat salvarea fișierelor G și D numai a celor mai recente versiuni, economisind în mod eficient spațiul de stocare.",
+ "Save Every Weights": "Economisiți fiecare greutate",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Această setare vă permite să economisiți greutățile modelului la sfârșitul fiecărei epoci.",
+ "Custom Pretrained": "Personalizat Pretrained",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Utilizarea modelelor personalizate pre-antrenate poate duce la rezultate superioare, deoarece selectarea celor mai potrivite modele pre-antrenate adaptate cazului de utilizare specific poate îmbunătăți semnificativ performanța.",
+ "Upload Pretrained Model": "Încărcați modelul preinstruit",
+ "Refresh Custom Pretraineds": "Reîmprospătați preinstruiții personalizați",
+ "Pretrained Custom Settings": "Setări personalizate pre-instruite",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Fișierul pe care l-ați scăpat nu este un fișier preinstruit valid. Vă rugăm să încercați din nou.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Faceți clic pe butonul de reîmprospătare pentru a vedea fișierul preantrenat în meniul derulant.",
+ "Pretrained G Path": "G personalizat preantrenat",
+ "Pretrained D Path": "Personalizat Pretrained D",
+ "GPU Settings": "Setări GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Setează setări GPU avansate, recomandate utilizatorilor cu o arhitectură GPU mai bună.",
+ "GPU Custom Settings": "Setări personalizate GPU",
+ "GPU Number": "Număr GPU",
+ "0 to ∞ separated by -": "0 până la ∞ separate de -",
+ "GPU Information": "Informații GPU",
+ "Pitch Guidance": "Pitch Guidance",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Prin utilizarea ghidării tonului, devine fezabilă oglindirea intonației vocii originale, inclusiv a înălțimii acesteia. Această caracteristică este deosebit de valoroasă pentru cântat și alte scenarii în care păstrarea melodiei originale sau a modelului de înălțime este esențială.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilizați modele pre-antrenate atunci când vă antrenați propriul. Această abordare reduce durata antrenamentului și îmbunătățește calitatea generală.",
+ "Extract Features": "Extrageți caracteristicile",
+ "Start Training": "Începeți instruirea",
+ "Generate Index": "Generare index",
+ "Voice Model": "Model vocal",
+ "Select the voice model to use for the conversion.": "Selectați modelul vocal de utilizat pentru conversie.",
+ "Index File": "Fișier index",
+ "Select the index file to use for the conversion.": "Selectați fișierul index de utilizat pentru conversie.",
+ "Refresh": "Împrospăta",
+ "Unload Voice": "Descărcare voce",
+ "Single": "Singur",
+ "Upload Audio": "Încărcare audio",
+ "Select Audio": "Selectați Audio",
+ "Select the audio to convert.": "Selectați sunetul de convertit.",
+ "Advanced Settings": "Setări avansate",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Ștergeți ieșirile (Șterge toate audio-urile din active / audio)",
+ "Custom Output Path": "Cale de ieșire personalizată",
+ "Output Path": "Cale de ieșire",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Calea în care va fi salvat sunetul de ieșire, în mod implicit în active / audio / output.wav",
+ "Split Audio": "Împărțire audio",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Împărțiți sunetul în bucăți pentru inferență pentru a obține rezultate mai bune în unele cazuri.",
+ "Autotune": "Reglare automată",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Aplicați o reglare automată ușoară la inferențele dvs., recomandată pentru conversiile de cântat.",
+ "Clean Audio": "Sunet curat",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Curățați ieșirea audio utilizând algoritmi de detectare a zgomotului, recomandați pentru enunțarea sunetului.",
+ "Clean Strength": "Rezistență curată",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Setați nivelul de curățare la sunetul dorit, cu cât îl măriți mai mult, cu atât se va curăța mai mult, dar este posibil ca sunetul să fie mai comprimat.",
+ "Pitch": "Smoală",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Setați înălțimea sunetului, cu cât este mai mare valoarea, cu atât este mai mare înălțimea.",
+ "Filter Radius": "Raza filtrului",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Dacă numărul este mai mare sau egal cu trei, utilizarea filtrării mediane pe rezultatele tonului colectat are potențialul de a scădea respirația.",
+ "Search Feature Ratio": "Raportul caracteristicilor de căutare",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influența exercitată de fișierul index; O valoare mai mare corespunde unei influențe mai mari. Cu toate acestea, optarea pentru valori mai mici poate ajuta la atenuarea artefactelor prezente în audio.",
+ "Volume Envelope": "Plic de volum",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Înlocuiți sau amestecați cu plicul de volum al ieșirii. Cu cât raportul este mai apropiat de 1, cu atât este folosit mai mult plicul de ieșire.",
+ "Protect Voiceless Consonants": "Protejați consoanele fără voce",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Protejați consoanele distincte și sunetele de respirație pentru a preveni ruperea electro-acustică și alte artefacte. Tragerea parametrului la valoarea maximă de 0,5 oferă o protecție completă. Cu toate acestea, reducerea acestei valori ar putea reduce gradul de protecție, atenuând în același timp efectul de indexare.",
+ "Pitch extraction algorithm": "Algoritm de extracție a pitch-ului",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritm de extragere a pitch-ului de utilizat pentru conversia audio. Algoritmul implicit este rmvpe, care este recomandat în majoritatea cazurilor.",
+ "Convert": "Converti",
+ "Export Audio": "Exportați audio",
+ "Batch": "Lot",
+ "Input Folder": "Folder de intrare",
+ "Select the folder containing the audios to convert.": "Selectați folderul care conține audio-urile de convertit.",
+ "Enter input path": "Introduceți calea de intrare",
+ "Output Folder": "Dosar de ieșire",
+ "Select the folder where the output audios will be saved.": "Selectați folderul în care vor fi salvate audio-urile de ieșire.",
+ "Enter output path": "Introduceți calea de ieșire",
+ "Get information about the audio": "Obțineți informații despre audio",
+ "Information about the audio file": "Informații despre fișierul audio",
+ "Waiting for information...": "În așteptarea informațiilor...",
+ "## Voice Blender": "## Blender de voce",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Selectați două modele de voce, setați procentajul de amestec dorit și amestecați-le într-o voce complet nouă.",
+ "Voice Blender": "Blender de voce",
+ "Drag and drop your model here": "Glisați și fixați modelul aici",
+ "You can also use a custom path.": "De asemenea, puteți utiliza un traseu personalizat.",
+ "Blend Ratio": "Raport de amestecare",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Ajustarea poziției mai mult spre o parte sau alta va face modelul mai asemănător cu primul sau al doilea.",
+ "Fusion": "Fuziune",
+ "Path to Model": "Calea către model",
+ "Enter path to model": "Introduceți calea către model",
+ "Model information to be placed": "Informații despre model care trebuie plasate",
+ "Inroduce the model information": "Inroduceți informațiile despre model",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Informațiile care trebuie plasate în model (Puteți să-l lăsați necompletat sau să puneți orice).",
+ "View model information": "Vizualizarea informațiilor despre model",
+ "Introduce the model pth path": "Introduceți calea pth a modelului",
+ "View": "Vedere",
+ "Model extraction": "Extragerea modelului",
+ "Model conversion": "Conversia modelului",
+ "Pth file": "Fișier Pth",
+ "Output of the pth file": "Ieșirea fișierului pth",
+ "# How to Report an Issue on GitHub": "# Cum să raportați o problemă pe GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Faceți clic pe butonul \"Ecran de înregistrare\" de mai jos pentru a începe înregistrarea problemei pe care o întâmpinați.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. După ce ați terminat de înregistrat problema, faceți clic pe butonul \"Opriți înregistrarea\" (același buton, dar eticheta se schimbă în funcție de înregistrarea activă sau nu).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Accesați [Probleme GitHub] (https://github.com/IAHispano/Applio/issues) și faceți clic pe butonul \"Problemă nouă\".",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Completați șablonul de problemă furnizat, asigurându-vă că includeți detalii după cum este necesar și utilizați secțiunea active pentru a încărca fișierul înregistrat din pasul anterior.",
+ "Record Screen": "Ecran de înregistrare",
+ "Record": "Disc",
+ "Stop Recording": "Opriți înregistrarea",
+ "Introduce the model .pth path": "Introducerea căii .pth a modelului",
+ "See Model Information": "Consultați informațiile despre model",
+ "## Download Model": "## Descărcați modelul",
+ "Model Link": "Model Link",
+ "Introduce the model link": "Introduceți linkul modelului",
+ "Download Model": "Descărcați modelul",
+ "## Drop files": "## Aruncați fișiere",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Glisați fișierul .pth și fișierul .index în acest spațiu. Trageți unul și apoi celălalt.",
+ "TTS Voices": "Voci TTS",
+ "Select the TTS voice to use for the conversion.": "Selectați vocea TTS de utilizat pentru conversie.",
+ "Text to Synthesize": "Text pentru sintetizare",
+ "Enter the text to synthesize.": "Introduceți textul pentru a sintetiza.",
+ "Or you can upload a .txt file": "Sau puteți încărca un fișier .txt",
+ "Enter text to synthesize": "Introduceți text pentru sintetizare",
+ "Output Path for TTS Audio": "Cale de ieșire pentru TTS Audio",
+ "Output Path for RVC Audio": "Cale de ieșire pentru RVC Audio",
+ "Enable Applio integration with Discord presence": "Activați integrarea Applio cu prezența Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Acesta va activa posibilitatea afișării activității curente Applio în Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Activați integrarea Applio cu applio.org/models folosind balonul",
+ "It will activate the possibility of downloading models with a click from the website.": "Acesta va activa posibilitatea de a descărca modele cu un clic de pe site.",
+ "Theme": "Temă",
+ "Select the theme you want to use. (Requires restarting Applio)": "Selectați tema pe care doriți să o utilizați. (Necesită repornirea Applio)",
+ "Language": "Limbă",
+ "Select the language you want to use. (Requires restarting Applio)": "Selectați limba pe care doriți să o utilizați. (Necesită repornirea Applio)",
+ "Plugin Installer": "Instalator de plugin-uri",
+ "Drag your plugin.zip to install it": "Trageți plugin.zip pentru a-l instala",
+ "Version Checker": "Verificator de versiuni",
+ "Check which version of Applio is the latest to see if you need to update.": "Verificați ce versiune de Applio este cea mai recentă pentru a vedea dacă trebuie să actualizați.",
+ "Check for updates": "Căutați actualizări"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/ru_RU.json b/assets/i18n/languages/ru_RU.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba4122330c48c70091590dc358dce42668dbf1da
--- /dev/null
+++ b/assets/i18n/languages/ru_RU.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Конвертация голоса на основе VITS, ориентированная на простоту, качество и производительность.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Этот раздел содержит дополнительные утилиты, которые часто могут находиться на экспериментальных стадиях.",
+ "Output Information": "Выходная информация",
+ "The output information will be displayed here.": "Здесь будет отображена выходная информация.",
+ "Inference": "Вывод",
+ "Train": "Обучение",
+ "Extra": "Дополнительно",
+ "Merge Audios": "Слияние аудио",
+ "Processing": "Обработка",
+ "Audio Analyzer": "Анализатор аудио",
+ "Model Information": "Информация о модели",
+ "Plugins": "Плагины",
+ "Download": "Скачать",
+ "Report a Bug": "Сообщить об ошибке",
+ "Settings": "Настройки",
+ "Preprocess": "Предварительная обработка",
+ "Model Name": "Название модели",
+ "Name of the new model.": "Название новой модели.",
+ "Enter model name": "Введите название модели",
+ "Dataset Path": "Путь к набору данных",
+ "Path to the dataset folder.": "Путь к папке с набором данных.",
+ "Refresh Datasets": "Обновить наборы данных",
+ "Dataset Creator": "Создатель набора данных",
+ "Dataset Name": "Название набора данных",
+ "Name of the new dataset.": "Название нового набора данных.",
+ "Enter dataset name": "Введите название набора данных",
+ "Upload Audio Dataset": "Загрузить набор аудиоданных",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Аудиофайл успешно добавлен в набор данных. Пожалуйста, нажмите кнопку предварительной обработки.",
+ "Enter dataset path": "Введите путь к набору данных",
+ "Sampling Rate": "Частота дискретизации",
+ "The sampling rate of the audio files.": "Частота дискретизации аудиофайлов.",
+ "Model Architecture": "Версия RVC",
+ "Version of the model architecture.": "Версия RVC модели.",
+ "Preprocess Dataset": "Предварительная обработка набора данных",
+ "Extract": "Извлечь",
+ "Hop Length": "Длина скачка",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Обозначает время, необходимое системе для перехода к значительному изменению высоты тона. Меньшая длина скачка требует больше времени для вывода, но обычно обеспечивает более высокую точность определения высоты тона.",
+ "Batch Size": "Размер пакета",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Рекомендуется выровнять его с доступной видеопамятью вашего графического процессора. Значение 4 обеспечивает повышенную точность, но более медленную обработку, тогда как 8 обеспечивает более быстрые и стандартные результаты.",
+ "Save Every Epoch": "Сохранять каждую эпоху",
+ "Determine at how many epochs the model will saved at.": "Определите, на скольких эпохах будет сохраняться модель.",
+ "Total Epoch": "Общее количество эпох",
+ "Specifies the overall quantity of epochs for the model training process.": "Задает общее количество эпох для процесса обучения модели.",
+ "Pretrained": "Предварительно обученные модели",
+ "Save Only Latest": "Сохранять только последние версии",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Включение этой настройки приведет к тому, что файлы G и D будут сохранять только свои самые последние версии, что позволит эффективно экономить место на диске.",
+ "Save Every Weights": "Сохранять все веса",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Эта настройка позволяет сохранять весовые коэффициенты модели в конце каждой эпохи.",
+ "Custom Pretrained": "Пользовательские предварительно обученные модели",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Использование пользовательских предварительно обученных моделей может привести к превосходным результатам, так как выбор наиболее подходящих предварительно обученных моделей, адаптированных к конкретному сценарию использования, может значительно повысить производительность.",
+ "Upload Pretrained Model": "Загрузить предварительно обученную модель",
+ "Refresh Custom Pretraineds": "Обновить пользовательские предварительно обученные модели",
+ "Pretrained Custom Settings": "Пользовательские настройки предварительно обученных моделей",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Файл, который вы загрузили, не является допустимым предварительно обученным файлом. Пожалуйста, попробуйте снова.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Нажмите кнопку обновления, чтобы увидеть предварительно обученный файл в выпадающем меню.",
+ "Pretrained G Path": "Путь к пользовательскому предварительно обученному G",
+ "Pretrained D Path": "Путь к пользовательскому предварительно обученному D",
+ "GPU Settings": "Настройки GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Устанавливает расширенные настройки GPU, рекомендуемые для пользователей с улучшенной архитектурой GPU.",
+ "GPU Custom Settings": "Пользовательские настройки GPU",
+ "GPU Number": "Номер GPU",
+ "0 to ∞ separated by -": "от 0 до ∞, разделенные -",
+ "GPU Information": "Информация о GPU",
+ "Pitch Guidance": "Руководство по высоте тона",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Используя управление высотой тона, становится возможным отразить интонацию исходного голоса, включая его высоту. Эта функция особенно ценна для пения и других сценариев, где важно сохранить оригинальную мелодию или тональность.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Используйте предварительно обученные модели при обучении своих собственных. Такой подход сокращает продолжительность обучения и повышает общее качество.",
+ "Extract Features": "Извлечь функции",
+ "Start Training": "Начать обучение",
+ "Generate Index": "Сгенерировать индекс",
+ "Voice Model": "Голосовая модель",
+ "Select the voice model to use for the conversion.": "Выберите голосовую модель, которая будет использоваться для преобразования.",
+ "Index File": "Индексный файл",
+ "Select the index file to use for the conversion.": "Выберите индексный файл, который будет использоваться для преобразования.",
+ "Refresh": "Обновить",
+ "Unload Voice": "Выгрузить голос",
+ "Single": "Одиночный",
+ "Upload Audio": "Загрузить аудио",
+ "Select Audio": "Выберите аудио",
+ "Select the audio to convert.": "Выберите аудио для преобразования.",
+ "Advanced Settings": "Расширенные настройки",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Очистить выходные данные (удаляет все аудио в assets/audios)",
+ "Custom Output Path": "Пользовательский выходной путь",
+ "Output Path": "Выходной путь",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Путь, по которому будет сохранен выходной звук, по умолчанию в assets/audios/output.wav",
+ "Split Audio": "Разделить аудио",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Разделите аудио на фрагменты для вывода, чтобы получить лучшие результаты в некоторых случаях.",
+ "Autotune": "Автотюн",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Примените мягкую автонастройку к вашим выводам, рекомендуемую для певческих преобразований.",
+ "Clean Audio": "Очистить аудио",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Очистите аудиовыход с помощью алгоритмов обнаружения шума, рекомендуемых для разговорных аудио.",
+ "Clean Strength": "Сила очистки",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Установите желаемый уровень очистки звука, чем больше вы его увеличите, тем больше он будет очищаться, но возможно, что звук будет более сжатым.",
+ "Pitch": "Высота тона",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Установите высоту звука, чем выше значение, тем выше высота тона.",
+ "Filter Radius": "Радиус фильтра",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Если это число больше или равно трем, использование медианной фильтрации по собранным результатам тона может привести к снижению дыхания.",
+ "Search Feature Ratio": "Соотношение объектов поиска",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Влияние, оказываемое индексным файлом; Чем выше значение, тем больше влияние. Однако выбор более низких значений может помочь смягчить артефакты, присутствующие в аудио.",
+ "Volume Envelope": "Огибающая громкости",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Замените или смешайте с огибающей громкости выхода. Чем ближе отношение к 1, тем больше используется выходная огибающая.",
+ "Protect Voiceless Consonants": "Защита глухих согласных",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Защитите отчетливые согласные и звуки дыхания, чтобы предотвратить электроакустические разрывы и другие артефакты. Извлечение параметра до максимального значения 0,5 обеспечивает комплексную защиту. Однако уменьшение этого значения может снизить степень защиты, потенциально смягчив эффект индексирования.",
+ "Pitch extraction algorithm": "Алгоритм извлечения высоты тона",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Алгоритм извлечения высоты тона, используемый для преобразования звука. По умолчанию используется алгоритм rmvpe, который рекомендуется для большинства случаев.",
+ "Convert": "Преобразовать",
+ "Export Audio": "Экспортировать аудио",
+ "Batch": "Пакет",
+ "Input Folder": "Входная папка",
+ "Select the folder containing the audios to convert.": "Выберите папку, содержащую аудиофайлы для преобразования.",
+ "Enter input path": "Введите путь ввода",
+ "Output Folder": "Выходная папка",
+ "Select the folder where the output audios will be saved.": "Выберите папку, в которой будут сохранены выходные аудиозаписи.",
+ "Enter output path": "Введите выходной путь",
+ "Get information about the audio": "Получить информацию об аудио",
+ "Information about the audio file": "Информация об аудиофайле",
+ "Waiting for information...": "Ожидание информации...",
+ "## Voice Blender": "## Голосовой блендер",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Выберите две модели голоса, установите желаемый процент смешивания и смешайте их в совершенно новый голос.",
+ "Voice Blender": "Голосовой блендер",
+ "Drag and drop your model here": "Перетащите сюда свою модель",
+ "You can also use a custom path.": "Вы также можете использовать пользовательский путь.",
+ "Blend Ratio": "Соотношение смешивания",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Изменение положения в ту или иную сторону сделает модель более похожей на первую или вторую.",
+ "Fusion": "Слияние",
+ "Path to Model": "Путь к модели",
+ "Enter path to model": "Введите путь к модели",
+ "Model information to be placed": "Информация о модели, которая будет размещена",
+ "Inroduce the model information": "Ввод информации о модели",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Информация, которая будет размещена в модели (Вы можете оставить ее пустой или поставить что угодно).",
+ "View model information": "Просмотр информации о модели",
+ "Introduce the model pth path": "Введите путь к pth модели",
+ "View": "Просмотр",
+ "Model extraction": "Извлечение модели",
+ "Model conversion": "Преобразование модели",
+ "Pth file": "Pth файл",
+ "Output of the pth file": "Вывод pth файла",
+ "# How to Report an Issue on GitHub": "# Как сообщить о проблеме на GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Нажмите кнопку «Записать экран» ниже, чтобы начать запись проблемы, с которой вы столкнулись.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. После того, как вы закончили запись задачи, нажмите кнопку «Остановить запись» (та же кнопка, но метка меняется в зависимости от того, ведете ли вы активную запись или нет).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Перейдите в [GitHub Issues](https://github.com/IAHispano/Applio/issues) и нажмите кнопку «Новая проблема».",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Заполните предоставленный шаблон задачи, не забудьте включить необходимые сведения и используйте раздел ресурсов для загрузки записанного файла с предыдущего шага.",
+ "Record Screen": "Запись экрана",
+ "Record": "Запись",
+ "Stop Recording": "Остановить запись",
+ "Introduce the model .pth path": "Введите путь к .pth модели",
+ "See Model Information": "Посмотреть информацию о модели",
+ "## Download Model": "## Скачать модель",
+ "Model Link": "Ссылка на модель",
+ "Introduce the model link": "Введите ссылку на модель",
+ "Download Model": "Скачать модель",
+ "## Drop files": "## Удаление файлов",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Перетащите файлы .pth и .index в это пространство. Перетащите один, а затем другой.",
+ "TTS Voices": "Голоса TTS",
+ "Select the TTS voice to use for the conversion.": "Выберите голос TTS, который будет использоваться для преобразования.",
+ "Text to Synthesize": "Текст для синтеза",
+ "Enter the text to synthesize.": "Введите текст для синтеза.",
+ "Or you can upload a .txt file": "Или вы можете загрузить .txt файл",
+ "Enter text to synthesize": "Введите текст для синтеза",
+ "Output Path for TTS Audio": "Выходной путь для TTS-аудио",
+ "Output Path for RVC Audio": "Выходной путь для RVC-аудио",
+ "Enable Applio integration with Discord presence": "Включите интеграцию Applio с присутствием в Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Это активирует возможность отображения текущей активности Applio в Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Включите интеграцию Applio с applio.org/models с помощью flask",
+ "It will activate the possibility of downloading models with a click from the website.": "Он активирует возможность скачивания моделей одним кликом с сайта.",
+ "Theme": "Тема",
+ "Select the theme you want to use. (Requires restarting Applio)": "Выберите тему, которую хотите использовать. (Требуется перезапуск Applio)",
+ "Language": "Язык",
+ "Select the language you want to use. (Requires restarting Applio)": "Выберите язык, который вы хотите использовать. (Требуется перезапуск Applio)",
+ "Plugin Installer": "Установщик плагинов",
+ "Drag your plugin.zip to install it": "Перетащите plugin.zip, чтобы установить его",
+ "Version Checker": "Проверка версий",
+ "Check which version of Applio is the latest to see if you need to update.": "Проверьте, какая версия Applio является последней, чтобы узнать, нужно ли вам обновление.",
+ "Check for updates": "Проверка наличия обновлений"
+}
diff --git a/assets/i18n/languages/ta-IN.json b/assets/i18n/languages/ta-IN.json
new file mode 100644
index 0000000000000000000000000000000000000000..f73e7496966057c549b01b0546af5194672c1a7b
--- /dev/null
+++ b/assets/i18n/languages/ta-IN.json
@@ -0,0 +1,204 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "முழுமையான குரல் குளோனிங் கருவி, அநாகரமாக ஒருமிக்க, பகுக்காதது, பயனர் உருவாக்கத்திற்கு உயரியது.\n[தமிழ் மொழிபெயர்ப்பு: Enes](https://discord.com/users/1140031358006202468)",
+ "This section contains some extra utilities that often may be in experimental phases.": "இந்த பிரிவில் சேர்ந்துள்ள கொடுமை கருவிகளில் சார்ந்த பல கூட்டுத்தரங்களைக் கொண்டுள்ளது.",
+ "Output Information": "வெளியீடு தகவல்",
+ "The output information will be displayed here.": "வெளியீடு தகவல் இங்கே காட்டப்படும்.",
+ "Inference": "கருத்து",
+ "Train": "பயிற்சி",
+ "Extra": "கூடுதல்",
+ "Merge Audios": "ஒரேபோனில் ஒன்றாக்குக",
+ "Processing": "செயலாக்கம்",
+ "Audio Analyzer": "ஆடியோ பகுப்பாய்வாளர்",
+ "Model Information": "மாதிரி தகவல்",
+ "Plugins": "பிளகின்கள்",
+ "Download": "பதிவிறக்கம்",
+ "Report a Bug": "பிழை அறிக்கை",
+ "Settings": "அமைப்புகள்",
+ "Preprocess": "முன்பாகவும்",
+ "Model Name": "மாதிரி பெயர்",
+ "Name of the new model.": "புதிய மாதிரி பெயர்.",
+ "Enter model name": "மாதிரி பெயரை உள்ளிடவும்",
+ "Dataset Path": "தரவுத்தொகுதி பாதை",
+ "Path to the dataset folder.": "தரவுத்தொகுதி கோப்புக்கு பாதை.",
+ "Refresh Datasets": "தரவுத்தொகுதிகளை புதுப்பிக்கவும்",
+ "Dataset Creator": "தரவுத்தொகுதி உருவாக்கி",
+ "Dataset Name": "தரவுத்தொகுதி பெயர்",
+ "Name of the new dataset.": "புதிய தரவுத்தொகுதி பெயர்.",
+ "Enter dataset name": "தரவுத்தொகுதி பெயரை உள்ளிடவும்",
+ "Upload Audio Dataset": "ஆடியோ தரவுத்தொகுதியை பதிவேற்றவும்",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ஆடியோ கோப்பு தரவுத்தொகுதிக்கு வெற்றிகரமாக சேர்க்கப்பட்டுள்ளது. தயவுசெய்து முன்னிருப்பை அழுத்தவும்.",
+ "Enter dataset path": "தரவுத்தொகுதி பாதையை உள்ளிடவும்",
+ "Sampling Rate": "மாதிரி விகிதம்",
+ "The sampling rate of the audio files.": "ஆடியோ கோப்புகளின் மாதிரி விகிதம்.",
+ "Model Architecture": "RVC பதிப்பு",
+ "Version of the model architecture.": "மாதிரி RVC பதிப்பு.",
+ "Preprocess Dataset": "முன்பாகவும் தரவுத்தொகுதி",
+
+ "Embedder Model": "உள்ளீடு மாதிரி",
+ "Model used for learning speaker embedding.": "பேச்சாளர் உள்ளீட்டை கற்க பயன்படுத்தப்படும் மாதிரி.",
+ "Extract": "எக்ஸ்ட்ராக்ட்",
+ "Hop Length": "ஹாப் நீளம்",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "கருத்துக்கு எவ்வளவு நேரம் எடுத்துக் கொள்கிறது என்றால், அது ஒரு முக்கிய பிச் மாற்றத்திற்கு அமைந்துகொள்கின்றது. சிறிய ஹாப் நீளங்களுக்கு அதிக நேரம் தேவைப்படுகின்றது ஆனால் அவை உயரமான பிச் சரிசெய்தியை உருவாக்க உதவுகின்றன.",
+ "Batch Size": "பேட்ச் அளவு",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "உங்கள் GPU கிடைக்கும் கிடைச்சதை அவசியமாக உள்ளிட பரிந்திருக்கின்றது. 4 என்ற அமைப்பு உயர்த்தப்பட்ட உறுதியுள்ள சொல்லத்தைக் கொண்டுள்ளது ஆனால் அதிக நேரம் பயன்படுகின்றது, 8 அமைப்பு விரைவாக மற்றும் நிலைக்குப் பொருத்தப்படுகிறது.",
+ "Save Every Epoch": "ஒவ்வொரு காலமும் சேமிக்கவும்",
+ "Determine at how many epochs the model will saved at.": "மாதிரி எதிர்காலங்களில் எத்தனை படிப்புகளில் மாதிரியைச் சேமிக்க விரும்புகிறீர்கள்.",
+ "Total Epoch": "மொத்த எபக்",
+ "Specifies the overall quantity of epochs for the model training process.": "மாதிரி பயிற்சி செய்திகளின் மொத்த அளவை குறிப்பிடுகிறது.",
+ "Pretrained": "பூர்வதயாரிக",
+ "Save Only Latest": "கடைசியே சேமிக்கவும்",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "இந்த அமைப்பை இயக்கும் போது G மற்றும் D கோப்புகள் உங்கள் கடைசி பதிப்புகளைச் சேமிக்கும், வாயிலாக சேமிக்கப்படுகின்றன.",
+ "Save Every Weights": "ஒவ்வொரு எடைக்கும் சேமிக்கவும்",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "இந்த அமைப்பு உங்கள் மாதிரி பயிற்சி செய்தியின் முடிவில் மாதிரிகளை சேமிக்க அனுமதிக்கின்றது.",
+ "Custom Pretrained": "கஸ்டம் பூர்வதயாரிக",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "கஸ்டம் பூர்வதயாரிக மாதிரிகளை பயன்படுத்துவது சிறந்த விளக்கங்களை தரலாம், குறிப்பிடுகின்ற குழப்பத்திற்கு ஏற்றதும் பூர்வதயாரிக மாதிரிகளைத் தேர்ந்தெடுக்க உடனே அந்தக் குழப்பத்தை அபூர்வமாக செயல்படுத்தலாம்.",
+ "Upload Pretrained Model": "பூர்வதயாரிக மாதிரி மோடெலை பதிவேற்றவும்",
+ "Refresh Custom Pretraineds": "கஸ்டம் பூர்வதயாரிகளை புதுப்பிக்கவும்",
+ "Pretrained Custom Settings": "கஸ்டம் பூர்வதயாரிக அமைப்புகள்",
+ "The file you dropped is not a valid pretrained file. Please try again.": "நீங்கள் பொருத்தவில்லை என்றால் பூர்வதயாரிக கோப்பு அல்ல. மீண்டும் முயற்சிக்கவும்.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "கீழேயுள்ள பட்டி பட்டியில் பூர்வதயாரிக கோப்புக்கு உருவாக்க முயலுங்கள்.",
+ "Pretrained G Path": "கஸ்டம் பூர்வதயாரிக G பாதை",
+ "Pretrained D Path": "கஸ்டம் பூர்வதயாரிக D பாதை",
+ "GPU Settings": "GPU அமைப்புகள்",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "மேலும் முதிர்ச்சியான GPU அமைப்புகளை அமைக்கின்றது, உடனடியான GPU கருவிக்கு பரிந்திரமான பயனாளர்களுக்கு பரிந்துரிக்கப்படுகிறது.",
+ "GPU Custom Settings": "GPU கஸ்டம் அமைப்புகள்",
+ "GPU Number": "GPU எண்",
+ "0 to ∞ separated by -": "0 இரு ∞ பிரிவாக - வாக்கப்பட்டு",
+ "GPU Information": "GPU தகவல்",
+ "Pitch Guidance": "பிச் வழிநிரப்பல்",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "பிச் வழிநிரப்பல் மூலம், மூல குரலின் ஒலிக்கோட்டைக் கண்டுகொள்வது சாத்தியமாகின்றது, அதன் பிச்சை கூட. இந்த அம்சம் குரல் பாடலுக்கும் மற்றும் உலாவிகளுக்கும் மூல இசை அல்லது பிச் முதிரையைக் காப்பாற்ற எளியதாக இருக்கும்.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "உங்கள் பயிற்சியில் உங்கள் தனிப்பட்ட மாதிரிகளை பயன்படுத்துவது பூர்வதயாரிக மாதிரிகளை பயன்படுத்துவது குரல் பயிற்சி காலத்தை குறைக்கின்றது மற்றும் மொத்த தரவின் உயர்த்துத்தை அதிகரிக்கின்றது.",
+ "Extract Features": "அம்சங்கள் எடு",
+ "Overtraining Detector": "அதிகமாக பயிற்சிப்படுத்தும் அறிவுப்பால்",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "பயிற்சிப்படுத்தும் தரவை மிகவும் நன்றாக அறியும் பாதுகாப்பு மூலம் மாதிரி பயிற்சிப்படுத்துதலை தடுக்க, புதிய தரவுக்கு பொதுவாக பொருத்தமாக மாற்ற அனுமதியை இழக்குகிறது.",
+ "Overtraining Detector Settings": "அதிக பயிற்சிப்படுத்தும் அறிவுப்பால் அமைப்புகள்",
+ "Overtraining Threshold": "அதிக பயிற்சிப்படுத்தும் அறிவுப்பால் அதிகம்",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "அதிகமாக பயிற்சிப்படுத்தும் தரவு அறியப்படாதால் உங்கள் மாதிரியின் பயிற்சிப்படுத்தும் மொத்த எண்ணிக்கையை அமைக்கவும்.",
+
+ "Start Training": "பயிற்சி ஆரம்பிக்கவும்",
+ "Stop Training & Restart Applio": "பயிற்சி நிறுத்து & புதுப்பிக்க Applio",
+ "Generate Index": "சுருக்கம் உருவாக்கு",
+
+ "Export Model": "ஏற்றுமதி மாதிரி",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'பதிவேற்று' பொத்தான்கள் உள்ளீட்டிற்கு மட்டுமே கூகுள் கோலாப் சேமிப்பகத்திற்கு கடைசியாக கூகுள் டிரைவில் உங்கள் ApplioExported கோப்புக்கு ஏற்றுமதிக்கும்.",
+ "Exported Pth file": "ஏற்றுமதிக்கப்பட்ட Pth கோப்பு",
+ "Exported Index file": "ஏற்றுமதிக்கப்பட்ட சுட்டி கோப்பு",
+ "Select the pth file to be exported": "ஏற்றுமதிக்க வேண்டிய pth கோப்பைத் தேர்ந்தெடுக்கவும்",
+ "Select the index file to be exported": "ஏற்றுமதிக்க வேண்டிய சுட்டி கோப்பைத் தேர்ந்தெடுக்கவும்",
+ "Upload": "பதிவேற்று",
+
+ "Voice Model": "குரல் மாதிரி",
+ "Select the voice model to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த விரும்பும் குரல் மாதிரியை தேர்ந்தெடுக்கவும்.",
+ "Index File": "சுருக்க கோப்பு",
+ "Select the index file to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த உள்ள சுருக்க கோப்பை தேர்ந்தெடுக்கவும்.",
+ "Refresh": "புதுப்பிக்கவும்",
+ "Unload Voice": "குரல் அமைதி",
+ "Single": "ஒற்றை",
+ "Upload Audio": "ஒலியை பதிவேற்று",
+ "Select Audio": "ஒலியைத் தேர்ந்தெடு",
+ "Select the audio to convert.": "மாற்றுவதற்கு ஒலியைத் தேர்ந்தெடு.",
+ "Advanced Settings": "மேம்பாடு அமைப்புகள்",
+ "Clear Outputs (Deletes all audios in assets/audios)": "வெற்றிகளை அழித்தல் (assets/audios உள்ள அனைத்து ஒலிகளையும் நீக்கும்)",
+ "Custom Output Path": "கஸ்டம் வெற்றிப் பாதை",
+ "Output Path": "வெற்றி பாதை",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "வெற்றிகள் உள்ளிடப்பட்ட ஒலியைச் சேமிக்கப்படும் பாதை, பொதுவாக assets/audios/output.wav இல்.",
+ "Split Audio": "ஒலியை பிரித்தல்",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "கொலுசுகளாக ஒலியை பிரிக்க, சில நிலைகளில் சிறப்பு விளக்கங்களைப் பெற விரும்புகின்றது.",
+ "Autotune": "ஆட்டோடியூன்",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "உங்கள் முன்னோடிகளுக்கு ஒரு மென்னுரை ஆட்டோடியூனை பயன்படுத்தவும், பாடல் மாற்றங்களுக்கு பரிந்துரிக்கப்படுகின்றது.",
+ "Clean Audio": "சுத்தமான ஒலி",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "உங்கள் ஒலி வெற்றியை சுத்தமாக்க, பேசும் ஒலிகளுக்கு பரிந்துரிக்கப்படுகின்றது.",
+ "Clean Strength": "சுத்த வலிமை",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "நீங்கள் விரும்பும் ஒலிக்கு சுத்தமாக்க விளக்கு, அதை அதிகரிக்கும்போது அது அதிகரிக்கும், ஆனால் ஒலி குறுகியாக இருக்கலாம்.",
+ "Pitch": "பிச்",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ஒலியின் பிச் அமைக்கவும், மதிப்பு உயரானதும் அதிகமாகும்.",
+ "Filter Radius": "குழப்பத்தின் அருகு",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "எண் மூலம் மூன்று அல்லது அதனை விட அதிகமாக இருந்தால், சேகரித்த இசை முடிவுகளில் இயலுமை குறைவாகும் என்று சொல்லப்படுகின்றது.",
+ "Search Feature Ratio": "தேடல் அம்ச விகிதம்",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "இடைவரிசு கோப்பின் மூலம் உள்ள பாதியான ஒருவரிடத்திற்கு உருவாகும் அந்தக் கோப்பு; அதிக மதிப்பு அதிக உருவாகும் என்று அர்த்தம். எனவே, குறோக்கின் குறைந்த மதிப்புகளைத் தேர்வுசெய்வதால் ஒலியில் உள்ள கலப்புகளைத் தவிர்க்க உதவலாம்.",
+ "Volume Envelope": "அளவு என்வெலோப்",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "வெற்றியின் அளவு என்வெலோப் இல் மாறியது அல்லது இணைந்தது. விளக்கு அந்த விகிதம் 1 க்கு அழைத்திருந்தால், வெற்றியின் அளவு என்வெலோப் பயன்படும்.",
+ "Protect Voiceless Consonants": "குரலின் இல்லாத ஸ்வரக்களைக் காப்பாற்றவும்",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "எலக்ட்ரோ-ஒலி கொழுகு மற்றும் பிற கலப்புகளை தடுக்குவதற்கு விரிவான ஸ்வரக்களுக்கு மற்றும் சுவாசத் தானங்களுக்கு பாதுகாக்க. இந்த அளவுக்கு அதிகமாக 0.5 க்கு அழைத்துக் கொள்வது பொருத்தமான பாதுகாப்பை வழங்குகின்றது. ஆனால், இந்த மதிப்பை குறைந்ததாக்கின்றார் என்றால், பாதுகாப்புக்குரிய நிலை குறைந்துவிடப்படலாம் மற்றும் அதுவே இந்தக் குறோக்குனை பரிந்துரிக்கும் என்று எச்சரிக்கை தரகின்றது.",
+ "Pitch extraction algorithm": "பிச் எக்ஸ்டிரக்ஷன் அளவுத் தொகுப்பு",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ஒலி மாற்றத்திற்கு பயன்படுத்த வேண்டிய பிச் எக்ஸ்டிரக்ஷன் அளவுத் தொகுப்பு. இயல்பான அளவுத் தொகுப்பு rmvpe ஆகும், இது அதிகமாக பரிந்துரிக்கப்படுகின்றது.",
+
+ "Convert": "மாற்று",
+ "Export Audio": "ஒலியை ஏற்றுமதி செய்",
+
+ "Batch": "பேட்ச்",
+ "Input Folder": "உள்ளிடும் கோப்பு",
+ "Select the folder containing the audios to convert.": "மாற்ற ஒலிகளைக் கொண்ட கோப்புகளைக் கொண்ட கோப்புக்கு தேர்ந்தெடு.",
+ "Enter input path": "உள்ளிடும் பாதையை உள்ளிடுக",
+ "Output Folder": "வெற்றி கோப்பு",
+ "Select the folder where the output audios will be saved.": "வெற்றிகளைச் சேமிக்கப்படும் கோப்புக்கு தேர்ந்தெடு.",
+ "Enter output path": "வெற்றியின் பாதையை உள்ளிடுக",
+
+ "Get information about the audio": "ஒலியை பற்றிய தகவல் பெறுக",
+
+ "## Voice Blender": "## குரல் பிளெண்டர்",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "இரண்டு குரல் மாதிரிகளைத் தேர்வு செய்து, விரும்பிய குரல் சதவீதம் அமைக்கவும், அவைகளை முழுமையாக ஒரு புதிய குரலாக பிளெண்டுகின்றன.",
+ "Voice Blender": "குரல் பிளெண்டர்",
+ "Drag and drop your model here": "உங்கள் மாதிரி இங்கே எழுதவும்",
+ "You can also use a custom path.": "நீங்கள் கஸ்டம் பாதையையும் பயன்படுத்தலாம்.",
+ "Blend Ratio": "குரல் சதவீதம்",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "ஒரு பக்கத்திற்கு அல்லது மற்றும் மற்றும் அதிகமாக செய்யும் உள்ளீடு இரட்டிப் பார்த்துக் கொள்ளுதல் மாதிரியாகின்றது.",
+ "Fusion": "ஐக்கியம்",
+ "Path to Model": "மாதிரிக்கு பாதை",
+ "Enter path to model": "மாதிரிக்கு பாதையை உள்ளிடுக",
+ "Model information to be placed": "இருந்து விடப்பட வேண்டிய மாதிரி தகவல்",
+ "Introduce the model information": "மாதிரி தகவல் அறிமுகம்",
+ "The information to be placed in the model (You can leave it blank or put anything).": "மாதிரிக்கு வைக்கப்பட வேண்டிய தகவல் (நீங்கள் அந்தச் செயலை விட அந்தச் செய்யாமல் அனைத்ததையும் வைக்கலாம்).",
+ "View model information": "மாதிரி தகவலைக் காண்க",
+ "Introduce the model pth path": "மாதிரி pth பாதையை உள்ளிடுக",
+ "View": "காண்க",
+ "Model extraction": "மாதிரி எடுத்தல்",
+ "Model conversion": "மாதிரி மாற்றம்",
+ "Pth file": "Pth கோப்பு",
+ "Output of the pth file": "Pth கோப்பின் வெளியேற்றம்",
+ "# How to Report an Issue on GitHub": "# GitHub-ல் ஒரு பிரச்சினையை புகாரளிக்குவது எப்படி",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. நீங்கள் அந்தப் பிரச்சினையை பரிசோதிக்கும் கீழே 'திரையை பதிகம் செய்யும்' பொத்தானை கிளிக் செய்யவும்.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. நீங்கள் அந்தப் பிரச்சினையை பரிசோதித்துக் கொண்டிருக்கின்றீர்கள், அந்தச் செய்யப்படும் பொத்தானை கிளிக் செய்யவும் (இது நீங்கள் சொல்லப்படும் பொத்தான், ஆனால் நீங்கள் எந்தவேணையும் செய்யக்கூடிய நிலையின் போது பொத்தானின் பெயர் மாறுகின்றது).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) க்கு செல்லவும் மற்றும் 'புதிய பிரச்சினை' பொத்தானை கிளிக் செய்யவும்.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. வழுதுணர்ந்து, தேவையான விவரங்களைக் கொண்டு விரிவாக பிரச்சினை பதிவேடு செய்து, முந்தைய படித்த கோப்பை பதிவேடுக்கு பயன்படுத்தலாம்.",
+ "Record Screen": "திரையை பதிகம் செய்க",
+ "Record": "பதிகம் செய்க",
+ "Stop Recording": "பதிகம் நிறுத்துக",
+ "Introduce the model .pth path": "மாதிரி .pth பாதையை உள்ளிடுக",
+ "See Model Information": "மாதிரி தகவலைக் காண்க",
+ "## Download Model": "## மாதிரி பதிவிறக்கம்",
+ "Model Link": "மாதிரி இணைப்பு",
+ "Introduce the model link": "மாதிரி இணைப்பை உள்ளிடுக",
+ "Download Model": "மாதிரி பதிவிறக்கம்",
+ "## Drop files": "## கோப்புகளை விழுக",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "உங்கள் .pth கோப்பு மற்றும் .index கோப்பை இந்த இடத்திற்கு எழுதுங்கள். ஒருவருக்கு பிறகு ஒருவருக்கு எழுதுங்கள்.",
+ "## Search Model": "## மாதிரி தேடு",
+ "Search": "தேடு",
+ "Introduce the model name to search.": "தேடுவதற்கு மாதிரிப் பெயரை அறிமுகப்படுத்து.",
+ "We couldn't find models by that name.": "அந்த பெயரில் மாதிரிகளைக் கண்டுபிடிக்க முடியவில்லை.",
+
+ "TTS Voices": "TTS குரல்கள்",
+ "Select the TTS voice to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த உள்ள TTS குரலை தேர்ந்தெடுக்கவும்.",
+ "Text to Synthesize": "சிந்தனை செய்ய உள்ள உரை",
+ "Enter the text to synthesize.": "சிந்தனை செய்ய உள்ள உரையை உள்ளிடுக.",
+ "Or you can upload a .txt file": "அல்லது .txt கோப்பை பதிவேற்றலாம்",
+ "Enter text to synthesize": "சிந்தனை செய்ய உள்ள உரையை உள்ளிடுக",
+ "Output Path for TTS Audio": "TTS குரலுக்கான வெளியேற்ற பாதை",
+ "Output Path for RVC Audio": "RVC குரலுக்கான வெளியேற்ற பாதை",
+ "Enable Applio integration with Discord presence": "Discord உள்ளிட்டது உள்ளிடத்துடன் Applio ஒருவருக்கு இயங்குதல் இயல்புநிலை செய்தியை இயக்குங்கள்",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "இது Applio செயல்திறனை Discord-ல் காண்பிக்க முடியும்.",
+ "Enable Applio integration with applio.org/models using flask": "flask ஐப் பயன்படுத்தி applio.org/models உடன் Applio ஒருவருக்கு இயங்குதல் இயல்புநிலை செய்தியை இயக்குங்கள்",
+ "It will activate the possibility of downloading models with a click from the website.": "இது இணையத்திலிருந்து ஒரு கிளிக்குட்டுடன் மாதிரிகளை பதிவிறக்க முடியும்.",
+ "Enable fake GPU": "கப்பூ ஜி.பி.யூ ஐ இயக்கு",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "ஒரு ஜி.பி.யூ இல்லாமல் தற்போது பயிற்சிப்படுத்தல் ஆதரிக்கப்படவில்லை. பயிற்சிக்கு பட்டி செயல்முறையை செயலாக்க, அமைப்புகள் பட்டியலில் செல்ல, 'பெயர் ஜி.பி.யூ' விருப்பத்தை இயக்கவும்.",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "பயிற்சிக்கு பட்டி செயலாக்கிறது. எனவே, இந்த சாதனம் ஜி.பி.யூ திறன் இல்லையாம், அதனால் பயிற்சி ஆதரிக்கப்படவில்லை. இந்த விருப்பம் மட்டுமே சோதனை காரணங்களுக்காக உள்ளது. (இந்த விருப்பம் Applio ஐ மீண்டும் திரும்பியிருப்பதற்காக)",
+ "Theme": "தீமா",
+ "Select the theme you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் தீமையை தேர்ந்தெடுக்கவும். (Applio-ஐ மீளவே போகும்)",
+ "Language": "மொழி",
+ "Select the language you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் மொழியை தேர்ந்தெடுக்கவும். (Applio-ஐ மீளவே போகும்)",
+ "Plugin Installer": "பிளகின் நிறுவி",
+ "Drag your plugin.zip to install it": "உங்கள் plugin.zip கோப்பை இதுக்கு இழுக்கவும் அதை நிறுவுக",
+ "Version Checker": "பதிப்பு சரிபார்க்கல்",
+ "Check which version of Applio is the latest to see if you need to update.": "நீங்கள் புதியதாகப் புதுப்பிக்க வேண்டும் என்பதை பார்க்க, Applio இன் எந்த பதிப்பு சரிபார்க்கவும்.",
+ "Check for updates": "புதுப்பிக்கவும்"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/ta_TA.json b/assets/i18n/languages/ta_TA.json
new file mode 100644
index 0000000000000000000000000000000000000000..27cd4e3db52fbced9c6b3f1e2082fa41cd26adb4
--- /dev/null
+++ b/assets/i18n/languages/ta_TA.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "இறுதி குரல் குளோனிங் கருவி, நிகரற்ற சக்தி, மாடுலாரிட்டி மற்றும் பயனர் நட்பு அனுபவத்திற்காக உன்னிப்பாக உகந்ததாக உள்ளது.",
+ "This section contains some extra utilities that often may be in experimental phases.": "இந்த பிரிவில் சில கூடுதல் பயன்பாடுகள் உள்ளன, அவை பெரும்பாலும் சோதனை கட்டங்களில் இருக்கலாம்.",
+ "Output Information": "வெளியீட்டு தகவல்",
+ "The output information will be displayed here.": "வெளியீட்டு தகவல் இங்கே காட்டப்படும்.",
+ "Inference": "அனுமானம்",
+ "Train": "தொடர்வண்டி",
+ "Extra": "கூடுதல்",
+ "Merge Audios": "ஆடியோக்களை ஒன்றிணைக்கவும்",
+ "Processing": "செயலாக்க",
+ "Audio Analyzer": "ஆடியோ அனலைசர்",
+ "Model Information": "மாதிரி தகவல்",
+ "Plugins": "செருகுநிரல்கள்",
+ "Download": "பதிவிறக்க",
+ "Report a Bug": "பிழையைப் புகாரளி",
+ "Settings": "அமைப்புகள்",
+ "Preprocess": "முன் செயல்முறை",
+ "Model Name": "மாடல் பெயர்",
+ "Name of the new model.": "புதிய மாடலின் பெயர்.",
+ "Enter model name": "மாடல் பெயரை உள்ளிடவும்",
+ "Dataset Path": "தரவுத்தொகுப்பு பாதை",
+ "Path to the dataset folder.": "தரவுத்தொகுப்பு கோப்புறைக்கான பாதை.",
+ "Refresh Datasets": "தரவுத்தொகுப்புகளைப் புதுப்பிக்கவும்",
+ "Dataset Creator": "தரவுத்தொகுப்பை உருவாக்கியவர்",
+ "Dataset Name": "தரவுத்தொகுப்பு பெயர்",
+ "Name of the new dataset.": "புதிய தரவுத்தொகுப்பின் பெயர்.",
+ "Enter dataset name": "தரவுத்தொகுப்பு பெயரை உள்ளிடவும்",
+ "Upload Audio Dataset": "ஆடியோ தரவுத்தொகுப்பைப் பதிவேற்றவும்",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ஆடியோ கோப்பு வெற்றிகரமாக தரவுத்தொகுப்பில் சேர்க்கப்பட்டது. செயல்முறைக்கு முந்தைய பட்டனை கிளிக் செய்யவும்.",
+ "Enter dataset path": "தரவுத்தொகுப்பு பாதையை உள்ளிடவும்",
+ "Sampling Rate": "மாதிரி மதிப்பீடு",
+ "The sampling rate of the audio files.": "ஆடியோ கோப்புகளின் மாதிரி விகிதம்.",
+ "Model Architecture": "RVC பதிப்பு",
+ "Version of the model architecture.": "மாடலின் RVC பதிப்பு.",
+ "Preprocess Dataset": "செயல்முறைக்கு முந்தைய தரவுத்தொகுப்பு",
+ "Extract": "பிரித்தெடுத்தல்",
+ "Hop Length": "ஹாப் நீளம்",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "கணினி ஒரு குறிப்பிடத்தக்க சுருதி மாற்றத்திற்கு மாறுவதற்கு எடுக்கும் காலத்தைக் குறிக்கிறது. சிறிய ஹாப் நீளங்களுக்கு அனுமானத்திற்கு அதிக நேரம் தேவைப்படுகிறது, ஆனால் அதிக சுருதி துல்லியத்தை அளிக்க முனைகிறது.",
+ "Batch Size": "தொகுதி அளவு",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "உங்கள் GPU இன் கிடைக்கக்கூடிய VRAM உடன் அதை சீரமைப்பது நல்லது. 4 இன் அமைப்பு மேம்பட்ட துல்லியம் ஆனால் மெதுவான செயலாக்கத்தை வழங்குகிறது, அதே நேரத்தில் 8 வேகமான மற்றும் நிலையான முடிவுகளை வழங்குகிறது.",
+ "Save Every Epoch": "ஒவ்வொரு சகாப்தத்தையும் காப்பாற்றுங்கள்",
+ "Determine at how many epochs the model will saved at.": "மாதிரி எத்தனை சகாப்தங்களில் சேமிக்கப்படும் என்பதை தீர்மானிக்கவும்.",
+ "Total Epoch": "மொத்த சகாப்தம்",
+ "Specifies the overall quantity of epochs for the model training process.": "மாதிரி பயிற்சி செயல்முறைக்கான சகாப்தங்களின் ஒட்டுமொத்த அளவைக் குறிப்பிடுகிறது.",
+ "Pretrained": "முன் பயிற்சி",
+ "Save Only Latest": "சமீபத்தியதை மட்டும் சேமிக்கவும்",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "இந்த அமைப்பை இயக்குவது G மற்றும் D கோப்புகளை அவற்றின் மிகச் சமீபத்திய பதிப்புகளை மட்டுமே சேமிக்கும், சேமிப்பக இடத்தை திறம்பட சேமிக்கும்.",
+ "Save Every Weights": "ஒவ்வொரு எடைகளையும் சேமிக்கவும்",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "இந்த அமைப்பு ஒவ்வொரு சகாப்தத்தின் முடிவிலும் மாதிரியின் எடைகளை சேமிக்க உதவுகிறது.",
+ "Custom Pretrained": "தனிப்பயன் முன்பயிற்சி",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "தனிப்பயன் முன்பயிற்சி மாதிரிகளைப் பயன்படுத்துவது சிறந்த முடிவுகளுக்கு வழிவகுக்கும், ஏனெனில் குறிப்பிட்ட பயன்பாட்டு வழக்குக்கு ஏற்ப மிகவும் பொருத்தமான முன்பயிற்சி மாதிரிகளைத் தேர்ந்தெடுப்பது செயல்திறனை கணிசமாக மேம்படுத்தும்.",
+ "Upload Pretrained Model": "முன்பயிற்சி பெற்ற மாதிரியைப் பதிவேற்றவும்",
+ "Refresh Custom Pretraineds": "தனிப்பயன் ப்ரீட்ரெய்ன்ட்களைப் புதுப்பிக்கவும்",
+ "Pretrained Custom Settings": "முன் பயிற்சி பெற்ற தனிப்பயன் அமைப்புகள்",
+ "The file you dropped is not a valid pretrained file. Please try again.": "நீங்கள் கைவிட்ட கோப்பு ஒரு செல்லத்தக்க முன்பயிற்சி பெற்ற கோப்பு அல்ல. மீண்டும் முயற்சிக்கவும்.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "கீழ்தோன்றும் மெனுவில் முன்பயிற்சி பெற்ற கோப்பைக் காண புதுப்பிப்பு பொத்தானைக் கிளிக் செய்யவும்.",
+ "Pretrained G Path": "தனிப்பயன் முன் பயிற்சி ஜி",
+ "Pretrained D Path": "தனிப்பயன் முன்பயிற்சி D",
+ "GPU Settings": "GPU அமைப்புகள்",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "மேம்பட்ட GPU அமைப்புகளை அமைக்கிறது, சிறந்த GPU கட்டமைப்பு கொண்ட பயனர்களுக்கு பரிந்துரைக்கப்படுகிறது.",
+ "GPU Custom Settings": "GPU தனிப்பயன் அமைப்புகள்",
+ "GPU Number": "GPU எண்",
+ "0 to ∞ separated by -": "0 முதல் ∞ வரை பிரிக்கப்பட்டுள்ளது -",
+ "GPU Information": "தகவல்",
+ "Pitch Guidance": "சுருதி வழிகாட்டுதல்",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "சுருதி வழிகாட்டுதலைப் பயன்படுத்துவதன் மூலம், அதன் சுருதி உட்பட அசல் குரலின் தொனியை பிரதிபலிப்பது சாத்தியமாகும். அசல் மெல்லிசை அல்லது சுருதி வடிவத்தைப் பாதுகாப்பது அவசியம் என்று பாடுவதற்கும் பிற காட்சிகளுக்கும் இந்த அம்சம் குறிப்பாக மதிப்புமிக்கது.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "உங்கள் சொந்த பயிற்சியின் போது முன்கூட்டியே பயிற்சி பெற்ற மாதிரிகளைப் பயன்படுத்தவும். இந்த அணுகுமுறை பயிற்சி காலத்தை குறைக்கிறது மற்றும் ஒட்டுமொத்த தரத்தை மேம்படுத்துகிறது.",
+ "Extract Features": "பிரித்தெடுக்கும் அம்சங்கள்",
+ "Start Training": "பயிற்சியைத் தொடங்குங்கள்",
+ "Generate Index": "குறியீட்டை உருவாக்கவும்",
+ "Voice Model": "குரல் மாதிரி",
+ "Select the voice model to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த குரல் மாதிரியைத் தேர்ந்தெடுக்கவும்.",
+ "Index File": "அட்டவணை கோப்பு",
+ "Select the index file to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த குறியீட்டு கோப்பைத் தேர்ந்தெடுக்கவும்.",
+ "Refresh": "புதுப்பி",
+ "Unload Voice": "குரலை இறக்கவும்",
+ "Single": "ஒற்றை",
+ "Upload Audio": "ஆடியோவை பதிவேற்றவும்",
+ "Select Audio": "ஆடியோவை தேர்ந்தெடு",
+ "Select the audio to convert.": "மாற்ற ஆடியோவைத் தேர்ந்தெடுக்கவும்.",
+ "Advanced Settings": "மேம்பட்ட அமைப்புகள்",
+ "Clear Outputs (Deletes all audios in assets/audios)": "வெளியீடுகளை அழிக்கவும் (சொத்துக்கள் / ஆடியோக்களில் உள்ள அனைத்து ஆடியோக்களையும் நீக்குகிறது)",
+ "Custom Output Path": "தனிப்பயன் வெளியீட்டு பாதை",
+ "Output Path": "வெளியீட்டுப் பாதை",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "வெளியீட்டு ஆடியோ சேமிக்கப்படும் பாதை, இயல்பாக சொத்துக்கள் / ஆடியோக்கள் / output.wav",
+ "Split Audio": "பிளவு ஆடியோ",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "சில சந்தர்ப்பங்களில் சிறந்த முடிவுகளைப் பெற அனுமானத்திற்காக ஆடியோவை துண்டுகளாகப் பிரிக்கவும்.",
+ "Autotune": "ஆட்டோடியூன்",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "உங்கள் அனுமானங்களுக்கு ஒரு மென்மையான ஆட்டோட்யூனைப் பயன்படுத்துங்கள், பாடல் மாற்றங்களுக்கு பரிந்துரைக்கப்படுகிறது.",
+ "Clean Audio": "சுத்தமான ஆடியோ",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ஆடியோக்களைப் பேசுவதற்குப் பரிந்துரைக்கப்படும் இரைச்சல் கண்டறிதல் அல்காரிதம்களைப் பயன்படுத்தி உங்கள் ஆடியோ அவுட்புட்டை சுத்தம் செய்யவும்.",
+ "Clean Strength": "சுத்தமான வலிமை",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "நீங்கள் விரும்பும் ஆடியோவுக்கு சுத்தம் செய்யும் அளவை அமைக்கவும், நீங்கள் அதை எவ்வளவு அதிகரிக்கிறீர்களோ, அவ்வளவு அதிகமாக அது சுத்தம் செய்யப்படும், ஆனால் ஆடியோ மிகவும் சுருக்கப்பட்டிருக்கும்.",
+ "Pitch": "எறி",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ஆடியோவின் சுருதியை அமைக்கவும், அதிக மதிப்பு, அதிக சுருதி.",
+ "Filter Radius": "வடிகட்டி ஆரம்",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "எண்ணிக்கை மூன்றை விட அதிகமாகவோ அல்லது சமமாகவோ இருந்தால், சேகரிக்கப்பட்ட தொனி முடிவுகளில் சராசரி வடிகட்டலைப் பயன்படுத்துவது சுவாசத்தைக் குறைக்கும் ஆற்றலைக் கொண்டுள்ளது.",
+ "Search Feature Ratio": "தேடல் அம்ச விகிதம்",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "குறியீட்டு கோப்பு செலுத்தும் செல்வாக்கு; அதிக மதிப்பு அதிக செல்வாக்குக்கு ஒத்திருக்கிறது. இருப்பினும், குறைந்த மதிப்புகளைத் தேர்ந்தெடுப்பது ஆடியோவில் இருக்கும் கலைப்பொருட்களைத் தணிக்க உதவும்.",
+ "Volume Envelope": "தொகுதி உறை",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "வெளியீட்டின் தொகுதி உறையுடன் மாற்றவும் அல்லது கலக்கவும். விகிதம் 1 க்கு நெருக்கமாக இருந்தால், வெளியீடு உறை அதிகமாக பயன்படுத்தப்படுகிறது.",
+ "Protect Voiceless Consonants": "குரலற்ற மெய்யெழுத்துக்களைப் பாதுகாக்கவும்",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "மின்-ஒலி கிழித்தல் மற்றும் பிற கலைப்பொருட்களைத் தடுக்க தனித்துவமான மெய்யெழுத்துக்கள் மற்றும் சுவாச ஒலிகளைப் பாதுகாக்கவும். அளவுருவை அதன் அதிகபட்ச மதிப்பான 0.5 க்கு இழுப்பது விரிவான பாதுகாப்பை வழங்குகிறது. இருப்பினும், இந்த மதிப்பைக் குறைப்பது பாதுகாப்பின் அளவைக் குறைக்கலாம், அதே நேரத்தில் குறியீட்டு விளைவைத் தணிக்கலாம்.",
+ "Pitch extraction algorithm": "சுருதி பிரித்தெடுத்தல் அல்காரிதம்",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ஆடியோ மாற்றத்திற்கு பயன்படுத்த சுருதி பிரித்தெடுத்தல் வழிமுறை. இயல்புநிலை அல்காரிதம் rmvpe ஆகும், இது பெரும்பாலான சந்தர்ப்பங்களில் பரிந்துரைக்கப்படுகிறது.",
+ "Convert": "உருமாற்று",
+ "Export Audio": "ஆடியோவை ஏற்றுமதி செய்யவும்",
+ "Batch": "தொகுதி",
+ "Input Folder": "உள்ளீட்டு கோப்புறை",
+ "Select the folder containing the audios to convert.": "மாற்ற ஆடியோக்களைக் கொண்ட கோப்புறையைத் தேர்ந்தெடுக்கவும்.",
+ "Enter input path": "உள்ளீட்டு பாதையை உள்ளிடவும்",
+ "Output Folder": "வெளியீட்டு கோப்புறை",
+ "Select the folder where the output audios will be saved.": "வெளியீடு ஆடியோக்கள் சேமிக்கப்படும் கோப்புறையைத் தேர்ந்தெடுக்கவும்.",
+ "Enter output path": "வெளியீட்டு பாதையை உள்ளிடவும்",
+ "Get information about the audio": "ஆடியோ பற்றிய தகவலைப் பெறுங்கள்",
+ "Information about the audio file": "ஆடியோ கோப்பு பற்றிய தகவல்",
+ "Waiting for information...": "தகவலுக்காக காத்திருக்கிறேன்...",
+ "## Voice Blender": "## வாய்ஸ் பிளெண்டர்",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "இரண்டு குரல் மாதிரிகளைத் தேர்ந்தெடுத்து, நீங்கள் விரும்பிய கலவை சதவீதத்தை அமைத்து, அவற்றை முற்றிலும் புதிய குரலில் கலக்கவும்.",
+ "Voice Blender": "குரல் பிளெண்டர்",
+ "Drag and drop your model here": "உங்கள் மாதிரியை இங்கே இழுத்து விடுங்கள்",
+ "You can also use a custom path.": "நீங்கள் தனிப்பயன் பாதையையும் பயன்படுத்தலாம்.",
+ "Blend Ratio": "கலப்பு விகிதம்",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "நிலையை ஒரு பக்கம் அல்லது மறுபுறம் நோக்கி சரிசெய்வது மாதிரியை முதல் அல்லது இரண்டாவதைப் போலவே மாற்றும்.",
+ "Fusion": "இணைவு",
+ "Path to Model": "மாதிரிக்கான பாதை",
+ "Enter path to model": "மாதிரிக்கான பாதையை உள்ளிடவும்",
+ "Model information to be placed": "வைக்கப்பட வேண்டிய மாதிரி தகவல்",
+ "Inroduce the model information": "மாதிரி தகவலை அறிமுகப்படுத்தவும்",
+ "The information to be placed in the model (You can leave it blank or put anything).": "மாதிரியில் வைக்கப்பட வேண்டிய தகவல் (நீங்கள் அதை காலியாக விடலாம் அல்லது எதையும் வைக்கலாம்).",
+ "View model information": "மாதிரி தகவலைக் காண்க",
+ "Introduce the model pth path": "மாதிரி pth பாதையை அறிமுகப்படுத்தவும்",
+ "View": "பார்வை",
+ "Model extraction": "மாதிரி பிரித்தெடுத்தல்",
+ "Model conversion": "மாதிரி மாற்றம்",
+ "Pth file": "Pth கோப்பு",
+ "Output of the pth file": "pth கோப்பின் வெளியீடு",
+ "# How to Report an Issue on GitHub": "# GitHub இல் ஒரு சிக்கலை எவ்வாறு புகாரளிப்பது",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. நீங்கள் அனுபவிக்கும் சிக்கலைப் பதிவு செய்யத் தொடங்க கீழே உள்ள 'ரெக்கார்ட் ஸ்கிரீன்' பொத்தானைக் கிளிக் செய்க.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. நீங்கள் சிக்கலைப் பதிவு செய்து முடித்ததும், 'பதிவு செய்வதை நிறுத்து' பொத்தானைக் கிளிக் செய்க (அதே பொத்தான், ஆனால் நீங்கள் தீவிரமாக பதிவு செய்கிறீர்களா இல்லையா என்பதைப் பொறுத்து லேபிள் மாறுகிறது).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) என்பதற்குச் சென்று 'புதிய சிக்கல்' பொத்தானைக் கிளிக் செய்யவும்.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. வழங்கப்பட்ட சிக்கல் வார்ப்புருவை முடிக்கவும், தேவைக்கேற்ப விவரங்களைச் சேர்ப்பதை உறுதிசெய்து, முந்தைய கட்டத்திலிருந்து பதிவுசெய்யப்பட்ட கோப்பை பதிவேற்ற சொத்துக்கள் பிரிவைப் பயன்படுத்தவும்.",
+ "Record Screen": "பதிவு திரை",
+ "Record": "பதிவேடு",
+ "Stop Recording": "பதிவு செய்வதை நிறுத்து",
+ "Introduce the model .pth path": "மாதிரியை அறிமுகப்படுத்துங்கள் .pth பாதையை அறிமுகப்படுத்துங்கள்",
+ "See Model Information": "மாதிரி தகவலைப் பார்க்கவும்",
+ "## Download Model": "## பதிவிறக்க மாதிரி",
+ "Model Link": "மாதிரி இணைப்பு",
+ "Introduce the model link": "மாதிரி இணைப்பை அறிமுகப்படுத்தவும்",
+ "Download Model": "மாடலைப் பதிவிறக்கவும்",
+ "## Drop files": "## கோப்புகளை கைவிடுங்கள்",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "உங்கள் .pth கோப்பு மற்றும் .index கோப்பை இந்த இடத்திற்கு இழுக்கவும். ஒன்றை இழுத்து மற்றொன்றை இழுக்கவும்.",
+ "TTS Voices": "TTS குரல்கள்",
+ "Select the TTS voice to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த TTS குரலைத் தேர்ந்தெடுக்கவும்.",
+ "Text to Synthesize": "தொகுக்க உரை",
+ "Enter the text to synthesize.": "ஒருங்கிணைக்க உரையை உள்ளிடவும்.",
+ "Or you can upload a .txt file": "அல்லது .txt கோப்பை பதிவேற்றலாம்",
+ "Enter text to synthesize": "ஒருங்கிணைக்க உரையை உள்ளிடவும்",
+ "Output Path for TTS Audio": "TTS ஆடியோவுக்கான வெளியீட்டு பாதை",
+ "Output Path for RVC Audio": "RVC ஆடியோவுக்கான வெளியீட்டு பாதை",
+ "Enable Applio integration with Discord presence": "Discord இருப்புடன் Applio ஒருங்கிணைப்பை இயக்கவும்",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "டிஸ்கார்டில் தற்போதைய Applio செயல்பாட்டைக் காண்பிப்பதற்கான வாய்ப்பை இது செயல்படுத்தும்.",
+ "Enable Applio integration with applio.org/models using flask": "குடுவையைப் பயன்படுத்தி applio.org/models உடன் அப்லியோ ஒருங்கிணைப்பை இயக்கவும்",
+ "It will activate the possibility of downloading models with a click from the website.": "இணையதளத்தில் இருந்து ஒரு கிளிக்கில் மாடல்களைப் பதிவிறக்கும் வாய்ப்பை இது செயல்படுத்தும்.",
+ "Theme": "கருப்பொருள்",
+ "Select the theme you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் கருப்பொருளை தேர்ந்தெடுக்கவும். (அப்ளியோவை மறுதொடக்கம் செய்ய வேண்டும்)",
+ "Language": "மொழி",
+ "Select the language you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் மொழியைத் தேர்ந்தெடுக்கவும். (அப்ளியோவை மறுதொடக்கம் செய்ய வேண்டும்)",
+ "Plugin Installer": "செருகுநிரல் நிறுவி",
+ "Drag your plugin.zip to install it": "அதை நிறுவ உங்கள் plugin.zip இழுக்கவும்",
+ "Version Checker": "பதிப்பு சரிபார்ப்பு",
+ "Check which version of Applio is the latest to see if you need to update.": "நீங்கள் புதுப்பிக்க வேண்டுமா என்பதைப் பார்க்க அப்லியோவின் எந்த பதிப்பு சமீபத்தியது என்பதைச் சரிபார்க்கவும்.",
+ "Check for updates": "புதுப்பிப்புகளைச் சரிபார்க்கவும்"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/te_TE.json b/assets/i18n/languages/te_TE.json
new file mode 100644
index 0000000000000000000000000000000000000000..290be7405222f98da9e832ffbe4ffa8da53493ec
--- /dev/null
+++ b/assets/i18n/languages/te_TE.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "అల్టిమేట్ వాయిస్ క్లోనింగ్ టూల్, సాటిలేని శక్తి, మాడ్యులారిటీ మరియు వినియోగదారు-స్నేహపూర్వక అనుభవం కోసం జాగ్రత్తగా ఆప్టిమైజ్ చేయబడింది.",
+ "This section contains some extra utilities that often may be in experimental phases.": "ఈ విభాగంలో కొన్ని అదనపు ఉపయోగాలు ఉన్నాయి, అవి తరచుగా ప్రయోగాత్మక దశలలో ఉండవచ్చు.",
+ "Output Information": "అవుట్ పుట్ సమాచారం",
+ "The output information will be displayed here.": "అవుట్ పుట్ సమాచారం ఇక్కడ ప్రదర్శించబడుతుంది.",
+ "Inference": "అనిమితి",
+ "Train": "రైలు",
+ "Extra": "అదనం",
+ "Merge Audios": "ఆడియోలను విలీనం చేయండి",
+ "Processing": "ప్రాసెసింగ్",
+ "Audio Analyzer": "Audio Analyzer",
+ "Model Information": "నమూనా సమాచారం",
+ "Plugins": "Plugins",
+ "Download": "డౌన్ లోడ్ చేసుకోండి",
+ "Report a Bug": "బగ్ ని రిపోర్ట్ చేయండి",
+ "Settings": "సెట్టింగ్ లు",
+ "Preprocess": "ప్రీప్రాసెస్",
+ "Model Name": "మోడల్ పేరు",
+ "Name of the new model.": "కొత్త మోడల్ పేరు..",
+ "Enter model name": "మోడల్ పేరు నమోదు చేయండి",
+ "Dataset Path": "Dataset Path",
+ "Path to the dataset folder.": "డేటాసెట్ ఫోల్డర్ కు మార్గం.",
+ "Refresh Datasets": "రిఫ్రెష్ డేటాసెట్ లు",
+ "Dataset Creator": "డేటాసెట్ సృష్టికర్త",
+ "Dataset Name": "డేటాసెట్ పేరు",
+ "Name of the new dataset.": "కొత్త డేటాసెట్ పేరు.",
+ "Enter dataset name": "డేటాసెట్ పేరును నమోదు చేయండి",
+ "Upload Audio Dataset": "ఆడియో డేటాసెట్ అప్ లోడ్ చేయండి",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ఆడియో ఫైల్ డేటాసెట్ కు విజయవంతంగా జోడించబడింది. దయచేసి ప్రీప్రాసెస్ బటన్ మీద క్లిక్ చేయండి.",
+ "Enter dataset path": "డేటాసెట్ మార్గాన్ని నమోదు చేయండి",
+ "Sampling Rate": "నమూనా రేటు",
+ "The sampling rate of the audio files.": "ఆడియో ఫైల్స్ యొక్క నమూనా రేటు.",
+ "Model Architecture": "Model Architecture",
+ "Version of the model architecture.": "మోడల్ యొక్క ఆర్ విసి వెర్షన్.",
+ "Preprocess Dataset": "ప్రీప్రాసెస్ Dataset",
+ "Extract": "ఉద్ధరించు",
+ "Hop Length": "హాప్ పొడవు",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "గణనీయమైన పిచ్ మార్పుకు సిస్టమ్ పరివర్తన చెందడానికి పట్టే వ్యవధిని సూచిస్తుంది. చిన్న హాప్ పొడవులు అంచనా వేయడానికి ఎక్కువ సమయం అవసరం, కానీ అధిక పిచ్ ఖచ్చితత్వాన్ని ఇస్తాయి.",
+ "Batch Size": "బ్యాచ్ పరిమాణం",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "దీన్ని మీ జీపీయూలో అందుబాటులో ఉన్న వీఆర్ఏఎంతో అనుసంధానం చేసుకోవడం మంచిది. 4 యొక్క అమరిక మెరుగైన ఖచ్చితత్వాన్ని అందిస్తుంది కాని నెమ్మదిగా ప్రాసెసింగ్ చేస్తుంది, అయితే 8 వేగవంతమైన మరియు ప్రామాణిక ఫలితాలను అందిస్తుంది.",
+ "Save Every Epoch": "ప్రతి యుగాన్ని కాపాడండి",
+ "Determine at how many epochs the model will saved at.": "మోడల్ ఎన్ని యుగాలలో సేవ్ చేయబడుతుందో నిర్ణయించండి.",
+ "Total Epoch": "మొత్తం యుగం",
+ "Specifies the overall quantity of epochs for the model training process.": "మోడల్ ట్రైనింగ్ ప్రాసెస్ కొరకు యుగాల యొక్క మొత్తం పరిమాణాన్ని నిర్దేశిస్తుంది.",
+ "Pretrained": "ప్రీ ట్రైనింగ్ చేయబడింది",
+ "Save Only Latest": "సేవ్ ఓన్లీ లేటెస్ట్",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "ఈ సెట్టింగ్ ని ఎనేబుల్ చేయడం వల్ల G మరియు D ఫైల్స్ వాటి ఇటీవలి వెర్షన్ లను మాత్రమే సేవ్ చేస్తాయి, స్టోరేజీ స్పేస్ ని సమర్థవంతంగా సంరక్షిస్తాయి.",
+ "Save Every Weights": "ప్రతి బరువులను ఆదా చేయండి",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "ఈ అమరిక ప్రతి యుగం ముగింపులో మోడల్ యొక్క బరువులను సేవ్ చేయడానికి మిమ్మల్ని అనుమతిస్తుంది.",
+ "Custom Pretrained": "Custom Pretrained",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "కస్టమ్ ప్రీ-ట్రైన్డ్ మోడళ్లను ఉపయోగించడం మెరుగైన ఫలితాలకు దారితీస్తుంది, ఎందుకంటే నిర్దిష్ట ఉపయోగం కేసుకు అనుగుణంగా అత్యంత తగిన ప్రీట్రైనింగ్ మోడళ్లను ఎంచుకోవడం పనితీరును గణనీయంగా మెరుగుపరుస్తుంది.",
+ "Upload Pretrained Model": "ప్రీ ట్రైన్డ్ మోడల్ అప్ లోడ్ చేయండి",
+ "Refresh Custom Pretraineds": "రిఫ్రెష్ కస్టమ్ ప్రీ ట్రైన్డ్ లు",
+ "Pretrained Custom Settings": "ముందస్తుగా శిక్షణ పొందిన కస్టమ్ సెట్టింగ్ లు",
+ "The file you dropped is not a valid pretrained file. Please try again.": "మీరు డ్రాప్ చేసిన ఫైల్ చెల్లుబాటు అయ్యే ముందస్తు శిక్షణ పొందిన ఫైల్ కాదు. దయచేసి మళ్లీ ప్రయత్నించండి.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "డ్రాప్ డౌన్ మెనూలో ముందుగా శిక్షణ పొందిన ఫైల్ ను చూడటం కొరకు రిఫ్రెష్ బటన్ మీద క్లిక్ చేయండి.",
+ "Pretrained G Path": "Custom Pretrained G",
+ "Pretrained D Path": "Custom Pretrained D",
+ "GPU Settings": "GPU సెట్టింగ్ లు",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "మెరుగైన GPU ఆర్కిటెక్చర్ ఉన్న వినియోగదారుల కొరకు సిఫారసు చేయబడ్డ అధునాతన GPU సెట్టింగ్ లను సెట్ చేస్తుంది.",
+ "GPU Custom Settings": "GPU కస్టమ్ సెట్టింగ్ లు",
+ "GPU Number": "జిపియు నెంబరు",
+ "0 to ∞ separated by -": "0 నుండి ∞ వరకు దీని ద్వారా వేరు చేయబడతాయి -",
+ "GPU Information": "GPU సమాచారం",
+ "Pitch Guidance": "పిచ్ మార్గదర్శకత్వం",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "పిచ్ మార్గదర్శకత్వాన్ని ఉపయోగించడం ద్వారా, దాని పిచ్తో సహా అసలు స్వరం యొక్క స్వరాన్ని ప్రతిబింబించడం సాధ్యమవుతుంది. అసలు మెలోడీ లేదా పిచ్ నమూనాను సంరక్షించడం అవసరమైన గానం మరియు ఇతర సందర్భాలకు ఈ లక్షణం ముఖ్యంగా విలువైనది.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "మీ స్వంత శిక్షణ చేసేటప్పుడు ముందస్తు శిక్షణ పొందిన నమూనాలను ఉపయోగించండి. ఈ విధానం శిక్షణ వ్యవధిని తగ్గిస్తుంది మరియు మొత్తం నాణ్యతను పెంచుతుంది.",
+ "Extract Features": "ఎక్స్ ట్రాక్ట్ ఫీచర్లు",
+ "Start Training": "శిక్షణ ప్రారంభించండి",
+ "Generate Index": "ఇండెక్స్ జనరేట్ చేయండి",
+ "Voice Model": "వాయిస్ మోడల్",
+ "Select the voice model to use for the conversion.": "కన్వర్షన్ కొరకు ఉపయోగించాల్సిన వాయిస్ మోడల్ ని ఎంచుకోండి.",
+ "Index File": "ఇండెక్స్ ఫైల్",
+ "Select the index file to use for the conversion.": "మార్పిడి కొరకు ఉపయోగించాల్సిన ఇండెక్స్ ఫైల్ ని ఎంచుకోండి.",
+ "Refresh": "రిఫ్రెష్",
+ "Unload Voice": "వాయిస్ ను అన్ లోడ్ చేయండి",
+ "Single": "ఏక",
+ "Upload Audio": "ఆడియో అప్ లోడ్ చేయండి",
+ "Select Audio": "ఆడియోను ఎంచుకోండి",
+ "Select the audio to convert.": "కన్వర్ట్ చేయడానికి ఆడియోను ఎంచుకోండి.",
+ "Advanced Settings": "అధునాతన సెట్టింగ్ లు",
+ "Clear Outputs (Deletes all audios in assets/audios)": "క్లియర్ అవుట్ పుట్స్ (అసెట్స్/ఆడియోల్లోని అన్ని ఆడియోలను తొలగిస్తుంది)",
+ "Custom Output Path": "కస్టమ్ అవుట్ పుట్ మార్గం",
+ "Output Path": "అవుట్ పుట్ మార్గం",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "అవుట్ పుట్ ఆడియో సేవ్ చేయబడే మార్గం, ఆస్తులు/ఆడియోలు/output.wav",
+ "Split Audio": "స్ప్లిట్ ఆడియో",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "కొన్ని సందర్భాల్లో మెరుగైన ఫలితాలను పొందడానికి ఆడియోను భాగాలుగా విభజించండి.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "పాటల మార్పిడి కోసం సిఫార్సు చేయబడిన మీ అంచనాలకు మృదువైన ఆటోట్యూన్ను వర్తించండి.",
+ "Clean Audio": "క్లీన్ ఆడియో",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ఆడియోలు మాట్లాడటానికి సిఫారసు చేయబడిన నాయిస్ డిటెక్షన్ అల్గారిథమ్ లను ఉపయోగించి మీ ఆడియో అవుట్ పుట్ ను శుభ్రం చేయండి.",
+ "Clean Strength": "క్లీన్ స్ట్రెంత్",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "మీకు కావలసిన ఆడియోకు క్లీన్-అప్ స్థాయిని సెట్ చేయండి, మీరు దానిని ఎంత పెంచితే అది మరింత క్లీన్ అవుతుంది, కానీ ఆడియో మరింత కంప్రెస్ అయ్యే అవకాశం ఉంది.",
+ "Pitch": "గొంతు",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ఆడియో యొక్క పిచ్ సెట్ చేయండి, విలువ ఎక్కువ, పిచ్ ఎక్కువ.",
+ "Filter Radius": "Filter వ్యాసార్థం",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "సంఖ్య మూడు కంటే ఎక్కువగా లేదా సమానంగా ఉంటే, సేకరించిన టోన్ ఫలితాలపై మధ్యస్థ వడపోతను ఉపయోగించడం శ్వాసక్రియను తగ్గించే సామర్థ్యాన్ని కలిగి ఉంటుంది.",
+ "Search Feature Ratio": "శోధన ఫీచర్ నిష్పత్తి",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "ఇండెక్స్ ఫైలు ప్రభావం; అధిక విలువ ఎక్కువ ప్రభావానికి అనుగుణంగా ఉంటుంది. అయినప్పటికీ, తక్కువ విలువలను ఎంచుకోవడం ఆడియోలో ఉన్న కళాఖండాలను తగ్గించడానికి సహాయపడుతుంది.",
+ "Volume Envelope": "వాల్యూమ్ కవరు",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "అవుట్ పుట్ యొక్క వాల్యూమ్ కవర్ తో ప్రత్యామ్నాయం చేయండి లేదా కలపండి. నిష్పత్తి 1 కి దగ్గరగా ఉంటే, అవుట్ పుట్ కవరు ఎక్కువగా ఉపయోగించబడుతుంది.",
+ "Protect Voiceless Consonants": "వాయిస్ లెస్ కన్సొనెంట్ లను సంరక్షించండి",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "ఎలక్ట్రో-అకౌస్టిక్ చిరిగిపోవడం మరియు ఇతర కళాఖండాలను నిరోధించడానికి విభిన్న ధ్వనులు మరియు శ్వాస శబ్దాలను సంరక్షించండి. పరామీటర్ ను దాని గరిష్ట విలువ 0.5కు లాగడం సమగ్ర రక్షణను అందిస్తుంది. ఏదేమైనా, ఈ విలువను తగ్గించడం వల్ల ఇండెక్సింగ్ ప్రభావాన్ని తగ్గించేటప్పుడు రక్షణ పరిధి తగ్గుతుంది.",
+ "Pitch extraction algorithm": "పిచ్ వెలికితీత అల్గోరిథం",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ఆడియో మార్పిడి కోసం ఉపయోగించే పిచ్ వెలికితీత అల్గోరిథం. డిఫాల్ట్ అల్గోరిథం ఆర్ఎమ్విపిఇ, ఇది చాలా సందర్భాలలో సిఫార్సు చేయబడింది.",
+ "Convert": "మార్చండి",
+ "Export Audio": "Export Audio",
+ "Batch": "దొంతర",
+ "Input Folder": "ఇన్ పుట్ ఫోల్డర్",
+ "Select the folder containing the audios to convert.": "కన్వర్ట్ చేయడానికి ఆడియోలను కలిగి ఉన్న సంచికను ఎంచుకోండి.",
+ "Enter input path": "ఇన్ పుట్ మార్గాన్ని నమోదు చేయండి",
+ "Output Folder": "అవుట్ పుట్ ఫోల్డర్",
+ "Select the folder where the output audios will be saved.": "అవుట్ పుట్ ఆడియోలు సేవ్ చేయబడే ఫోల్డర్ ను ఎంచుకోండి.",
+ "Enter output path": "అవుట్ పుట్ మార్గాన్ని నమోదు చేయండి",
+ "Get information about the audio": "ఆడియో గురించి సమాచారం పొందండి",
+ "Information about the audio file": "ఆడియో ఫైలు గురించి సమాచారం",
+ "Waiting for information...": "సమాచారం కోసం ఎదురుచూస్తూ...",
+ "## Voice Blender": "## వాయిస్ బ్లెండర్",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "రెండు వాయిస్ మోడళ్లను ఎంచుకోండి, మీకు కావలసిన మిశ్రమ శాతాన్ని సెట్ చేయండి మరియు వాటిని పూర్తిగా కొత్త గొంతులో కలపండి.",
+ "Voice Blender": "వాయిస్ బ్లెండర్",
+ "Drag and drop your model here": "మీ మోడల్ ని ఇక్కడ డ్రాగ్ చేయండి మరియు డ్రాప్ చేయండి",
+ "You can also use a custom path.": "మీరు కస్టమ్ మార్గాన్ని కూడా ఉపయోగించవచ్చు.",
+ "Blend Ratio": "మిశ్రమ నిష్పత్తి",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "పొజిషన్ ను ఒక వైపు లేదా మరొక వైపుకు మరింత సర్దుబాటు చేయడం వల్ల మోడల్ మొదటి లేదా రెండవ వైపు మరింత పోలి ఉంటుంది.",
+ "Fusion": "ఫ్యూజన్",
+ "Path to Model": "మోడల్ కు మార్గం[మార్చు]",
+ "Enter path to model": "మోడల్ కు మార్గాన్ని నమోదు చేయండి",
+ "Model information to be placed": "మోడల్ సమాచారం ఉంచాలి",
+ "Inroduce the model information": "మోడల్ సమాచారాన్ని ఇన్ డ్యూక్ చేయండి",
+ "The information to be placed in the model (You can leave it blank or put anything).": "మోడల్ లో ఉంచాల్సిన సమాచారం (మీరు దానిని ఖాళీగా ఉంచవచ్చు లేదా ఏదైనా ఉంచవచ్చు).",
+ "View model information": "నమూనా సమాచారాన్ని వీక్షించండి",
+ "Introduce the model pth path": "మోడల్ పిటిహెచ్ మార్గాన్ని పరిచయం చేయండి",
+ "View": "దృశ్యం",
+ "Model extraction": "నమూనా వెలికితీత",
+ "Model conversion": "నమూనా మార్పిడి",
+ "Pth file": "PTH ఫైల్",
+ "Output of the pth file": "పిటిహెచ్ ఫైల్ యొక్క అవుట్ పుట్",
+ "# How to Report an Issue on GitHub": "# గిట్హబ్లో సమస్యను ఎలా రిపోర్ట్ చేయాలి",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. మీరు ఎదుర్కొంటున్న సమస్యను రికార్డ్ చేయడం ప్రారంభించడానికి దిగువ 'రికార్డ్ స్క్రీన్' బటన్పై క్లిక్ చేయండి.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. మీరు సమస్యను రికార్డ్ చేయడం పూర్తి చేసిన తర్వాత, 'స్టాప్ రికార్డింగ్' బటన్పై క్లిక్ చేయండి (అదే బటన్, కానీ మీరు చురుకుగా రికార్డ్ చేస్తున్నారా లేదా అనే దానిపై ఆధారపడి లేబుల్ మారుతుంది).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. 'గిట్హబ్ ఇష్యూస్'(https://github.com/IAHispano/Applio/issues)లోకి వెళ్లి 'న్యూ ఇష్యూ' బటన్పై క్లిక్ చేయాలి.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. ఇచ్చిన ఇష్యూ టెంప్లేట్ ను పూర్తి చేసి, అవసరమైన విధంగా వివరాలను పొందుపర్చేలా చూసుకోవాలి మరియు మునుపటి దశ నుండి రికార్డ్ చేయబడిన ఫైల్ ను అప్ లోడ్ చేయడానికి ఆస్తుల విభాగాన్ని ఉపయోగించండి.",
+ "Record Screen": "స్క్రీన్ రికార్డ్ చేయండి",
+ "Record": "నమోదు",
+ "Stop Recording": "రికార్డింగ్ ఆపండి",
+ "Introduce the model .pth path": "మోడల్ .pth మార్గాన్ని పరిచయం చేయండి",
+ "See Model Information": "మోడల్ సమాచారం చూడండి",
+ "## Download Model": "## డౌన్ లోడ్ మోడల్",
+ "Model Link": "మోడల్ లింక్",
+ "Introduce the model link": "మోడల్ లింక్ ను పరిచయం చేయండి",
+ "Download Model": "మోడల్ డౌన్ లోడ్ చేయండి",
+ "## Drop files": "## డ్రాప్ ఫైళ్లు",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "మీ .pth ఫైల్ మరియు .index ఫైల్ ని ఈ స్పేస్ లోకి లాగండి. ఒకదాన్ని లాగి, తర్వాత మరొకటి లాగండి.",
+ "TTS Voices": "టిటిఎస్ వాయిస్",
+ "Select the TTS voice to use for the conversion.": "కన్వర్షన్ కొరకు ఉపయోగించడానికి TTS వాయిస్ ని ఎంచుకోండి.",
+ "Text to Synthesize": "సంశ్లేషణ చేయడానికి టెక్స్ట్",
+ "Enter the text to synthesize.": "సంశ్లేషణ చేయడానికి టెక్స్ట్ ను నమోదు చేయండి.",
+ "Or you can upload a .txt file": "లేదా .txt ఫైల్ ను అప్ లోడ్ చేయవచ్చు.",
+ "Enter text to synthesize": "సంశ్లేషణ చేయడానికి టెక్స్ట్ ను నమోదు చేయండి",
+ "Output Path for TTS Audio": "TTS ఆడియో కొరకు అవుట్ పుట్ మార్గం",
+ "Output Path for RVC Audio": "RVC ఆడియో కొరకు అవుట్ పుట్ మార్గం",
+ "Enable Applio integration with Discord presence": "Discord సమక్షంలో Applio ఇంటిగ్రేషన్ ప్రారంభించు",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "ఇది డిస్కార్డ్ లో ప్రస్తుత అప్లైయో యాక్టివిటీని ప్రదర్శించే అవకాశాన్ని యాక్టివేట్ చేస్తుంది.",
+ "Enable Applio integration with applio.org/models using flask": "ఫ్లాస్క్ ను ఉపయోగించి applio.org/models అప్లికేషన్ ఇంటిగ్రేషన్ ప్రారంభించండి",
+ "It will activate the possibility of downloading models with a click from the website.": "వెబ్ సైట్ నుంచి క్లిక్ తో మోడళ్లను డౌన్ లోడ్ చేసుకునే సదుపాయాన్ని యాక్టివేట్ చేస్తుంది.",
+ "Theme": "థీమ్",
+ "Select the theme you want to use. (Requires restarting Applio)": "మీరు ఉపయోగించాలనుకుంటున్న థీమ్ ఎంచుకోండి. (అప్లికేషన్ ను పునఃప్రారంభించడం అవసరం)",
+ "Language": "భాష",
+ "Select the language you want to use. (Requires restarting Applio)": "మీరు ఉపయోగించాలనుకుంటున్న భాషను ఎంచుకోండి. (అప్లికేషన్ ను పునఃప్రారంభించడం అవసరం)",
+ "Plugin Installer": "Plugin Installer",
+ "Drag your plugin.zip to install it": "దీన్ని ఇన్ స్టాల్ చేయడానికి మీ plugin.zip లాగండి",
+ "Version Checker": "Version Checker",
+ "Check which version of Applio is the latest to see if you need to update.": "అప్లియో యొక్క ఏ వెర్షన్ లేటెస్ట్ అని చెక్ చేయండి, మీరు అప్ డేట్ చేయాల్సిన అవసరం ఉందా అని చూడండి.",
+ "Check for updates": "అప్ డేట్ ల కొరకు చెక్ చేయండి"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/th_TH.json b/assets/i18n/languages/th_TH.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba5ea4f11d2ffc331f0f8d455021abf5403eb07d
--- /dev/null
+++ b/assets/i18n/languages/th_TH.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "สุดยอดเครื่องมือโคลนเสียงที่ได้รับการปรับให้เหมาะสมอย่างพิถีพิถันเพื่อพลังที่ไม่มีใครเทียบได้ความเป็นโมดูลและประสบการณ์ที่ใช้งานง่าย",
+ "This section contains some extra utilities that often may be in experimental phases.": "ส่วนนี้ประกอบด้วยยูทิลิตี้พิเศษบางอย่างที่มักจะอยู่ในขั้นตอนการทดลอง",
+ "Output Information": "ข้อมูลเอาต์พุต",
+ "The output information will be displayed here.": "ข้อมูลผลลัพธ์จะแสดงที่นี่",
+ "Inference": "การอนุมาน",
+ "Train": "รถไฟ",
+ "Extra": "พิเศษ",
+ "Merge Audios": "รวมไฟล์เสียง",
+ "Processing": "ประมวล ผล",
+ "Audio Analyzer": "เครื่องวิเคราะห์เสียง",
+ "Model Information": "ข้อมูลรุ่น",
+ "Plugins": "ปลั๊กอิน",
+ "Download": "ดาวน์โหลด",
+ "Report a Bug": "รายงานข้อบกพร่อง",
+ "Settings": "การตั้งค่า",
+ "Preprocess": "กระบวนการล่วงหน้า",
+ "Model Name": "ชื่อรุ่น",
+ "Name of the new model.": "ชื่อของรุ่นใหม่",
+ "Enter model name": "ใส่ชื่อรุ่น",
+ "Dataset Path": "เส้นทางชุดข้อมูล",
+ "Path to the dataset folder.": "เส้นทางไปยังโฟลเดอร์ชุดข้อมูล",
+ "Refresh Datasets": "รีเฟรชชุดข้อมูล",
+ "Dataset Creator": "ผู้สร้างชุดข้อมูล",
+ "Dataset Name": "ชื่อชุดข้อมูล",
+ "Name of the new dataset.": "ชื่อของชุดข้อมูลใหม่",
+ "Enter dataset name": "ป้อนชื่อชุดข้อมูล",
+ "Upload Audio Dataset": "อัปโหลดชุดข้อมูลเสียง",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "เพิ่มไฟล์เสียงลงในชุดข้อมูลเรียบร้อยแล้ว โปรดคลิกปุ่มดําเนินการล่วงหน้า",
+ "Enter dataset path": "ป้อนเส้นทางชุดข้อมูล",
+ "Sampling Rate": "Samp อัตราลิง",
+ "The sampling rate of the audio files.": "อัตราการสุ่มตัวอย่างของไฟล์เสียง",
+ "Model Architecture": "รุ่น RVC",
+ "Version of the model architecture.": "รุ่น RVC ของรุ่น",
+ "Preprocess Dataset": "ชุดข้อมูลประมวลผลล่วงหน้า",
+ "Extract": "สกัด",
+ "Hop Length": "ความยาวกระโดด",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "หมายถึงระยะเวลาที่ระบบใช้ในการเปลี่ยนไปใช้การเปลี่ยนแปลงระดับเสียงที่สําคัญ ความยาวฮ็อพที่เล็กกว่าต้องใช้เวลามากขึ้นในการอนุมาน แต่มักจะให้ความแม่นยําของพิทช์ที่สูงขึ้น",
+ "Batch Size": "ขนาดแบทช์",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "ขอแนะนําให้จัดตําแหน่งให้ตรงกับ VRAM ที่มีอยู่ของ GPU ของคุณ การตั้งค่า 4 ให้ความแม่นยําที่ดีขึ้น แต่การประมวลผลช้าลง ในขณะที่ 8 ให้ผลลัพธ์ที่เร็วขึ้นและเป็นมาตรฐาน",
+ "Save Every Epoch": "บันทึกทุกยุค",
+ "Determine at how many epochs the model will saved at.": "กําหนดจํานวนยุคที่โมเดลจะบันทึกไว้",
+ "Total Epoch": "ยุคทั้งหมด",
+ "Specifies the overall quantity of epochs for the model training process.": "ระบุปริมาณโดยรวมของยุคสําหรับกระบวนการฝึกอบรมแบบจําลอง",
+ "Pretrained": "ได้รับการฝึกฝนล่วงหน้า",
+ "Save Only Latest": "บันทึกเฉพาะล่าสุด",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "การเปิดใช้งานการตั้งค่านี้จะส่งผลให้ไฟล์ G และ D บันทึกเฉพาะเวอร์ชันล่าสุด ซึ่งช่วยประหยัดพื้นที่จัดเก็บได้อย่างมีประสิทธิภาพ",
+ "Save Every Weights": "ประหยัดทุกตุ้มน้ําหนัก",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "การตั้งค่านี้ช่วยให้คุณประหยัดน้ําหนักของโมเดลเมื่อสิ้นสุดแต่ละยุค",
+ "Custom Pretrained": "กําหนดเอง Pretrained",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "การใช้โมเดลที่ผ่านการฝึกอบรมล่วงหน้าแบบกําหนดเองสามารถนําไปสู่ผลลัพธ์ที่เหนือกว่า เนื่องจากการเลือกโมเดลที่ผ่านการฝึกอบรมล่วงหน้าที่เหมาะสมที่สุดซึ่งปรับให้เหมาะกับกรณีการใช้งานเฉพาะสามารถเพิ่มประสิทธิภาพได้อย่างมาก",
+ "Upload Pretrained Model": "อัปโหลดโมเดลที่ผ่านการฝึกอบรม",
+ "Refresh Custom Pretraineds": "รีเฟรช Custom Pretraineds",
+ "Pretrained Custom Settings": "การตั้งค่าแบบกําหนดเองที่ผ่านการฝึกอบรมล่วงหน้า",
+ "The file you dropped is not a valid pretrained file. Please try again.": "ไฟล์ที่คุณทิ้งไม่ใช่ไฟล์ที่ผ่านการฝึกอบรมล่วงหน้าที่ถูกต้อง โปรดลองอีกครั้ง",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "คลิกปุ่มรีเฟรชเพื่อดูไฟล์ที่ฝึกไว้ล่วงหน้าในเมนูแบบเลื่อนลง",
+ "Pretrained G Path": "Custom Pretrained G",
+ "Pretrained D Path": "กําหนดเอง Pretrained D",
+ "GPU Settings": "การตั้งค่า GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "ตั้งค่า GPU ขั้นสูง แนะนําสําหรับผู้ใช้ที่มีสถาปัตยกรรม GPU ที่ดีกว่า",
+ "GPU Custom Settings": "การตั้งค่าแบบกําหนดเองของ GPU",
+ "GPU Number": "หมายเลข GPU",
+ "0 to ∞ separated by -": "0 ถึง ∞ คั่นด้วย -",
+ "GPU Information": "ข้อมูล GPU",
+ "Pitch Guidance": "คําแนะนําสนาม",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "ด้วยการใช้คําแนะนําระดับเสียง จึงเป็นไปได้ที่จะสะท้อนน้ําเสียงของเสียงต้นฉบับ รวมถึงระดับเสียงด้วย คุณลักษณะนี้มีประโยชน์อย่างยิ่งสําหรับการร้องเพลงและสถานการณ์อื่นๆ ที่การรักษาท่วงทํานองดั้งเดิมหรือรูปแบบระดับเสียงเป็นสิ่งสําคัญ",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "ใช้โมเดลที่ผ่านการฝึกอบรมล่วงหน้าเมื่อฝึกของคุณเอง วิธีการนี้ช่วยลดระยะเวลาการฝึกอบรมและปรับปรุงคุณภาพโดยรวม",
+ "Extract Features": "คุณสมบัติสารสกัด",
+ "Start Training": "เริ่มการฝึกอบรม",
+ "Generate Index": "สร้างดัชนี",
+ "Voice Model": "รูปแบบเสียง",
+ "Select the voice model to use for the conversion.": "เลือกรูปแบบเสียงที่จะใช้สําหรับการแปลง",
+ "Index File": "ไฟล์ดัชนี",
+ "Select the index file to use for the conversion.": "เลือกไฟล์ดัชนีที่จะใช้สําหรับการแปลง",
+ "Refresh": "ฟื้นฟู",
+ "Unload Voice": "ยกเลิกการโหลดเสียง",
+ "Single": "โสด",
+ "Upload Audio": "อัพโหลดเสียง",
+ "Select Audio": "เลือกเสียง",
+ "Select the audio to convert.": "เลือกเสียงที่จะแปลง",
+ "Advanced Settings": "การตั้งค่าขั้นสูง",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Clear Outputs (ลบไฟล์เสียงทั้งหมดในเนื้อหา/ไฟล์เสียง)",
+ "Custom Output Path": "เส้นทางเอาต์พุตที่กําหนดเอง",
+ "Output Path": "เส้นทางเอาต์พุต",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "เส้นทางที่จะบันทึกเสียงเอาต์พุตโดยค่าเริ่มต้นในเนื้อหา/ไฟล์เสียง/output.wav",
+ "Split Audio": "แยกเสียง",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "แบ่งเสียงออกเป็นชิ้นๆ เพื่อการอนุมานเพื่อให้ได้ผลลัพธ์ที่ดีขึ้นในบางกรณี",
+ "Autotune": "ออโต้จูน",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "ใช้ซอฟต์ออโต้จูนกับการอนุมานของคุณ ซึ่งแนะนําสําหรับการแปลงการร้องเพลง",
+ "Clean Audio": "ทําความสะอาดเสียง",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ทําความสะอาดเอาต์พุตเสียงของคุณโดยใช้อัลกอริธึมการตรวจจับเสียงรบกวน ซึ่งแนะนําสําหรับการพูดไฟล์เสียง",
+ "Clean Strength": "ความแข็งแรงสะอาด",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "ตั้งค่าระดับการล้างข้อมูลเป็นเสียงที่คุณต้องการยิ่งคุณเพิ่มมากเท่าไหร่ก็ยิ่งทําความสะอาดได้มากขึ้นเท่านั้น แต่เป็นไปได้ว่าเสียงจะถูกบีบอัดมากขึ้น",
+ "Pitch": "ขว้าง",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "ตั้งค่าระดับเสียง ยิ่งค่าสูง ระดับเสียงก็จะยิ่งสูงขึ้น",
+ "Filter Radius": "รัศมีตัวกรอง",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "หากจํานวนมากกว่าหรือเท่ากับสามการใช้การกรองค่ามัธยฐานกับผลลัพธ์ของโทนเสียงที่รวบรวมได้มีศักยภาพในการลดการหายใจ",
+ "Search Feature Ratio": "อัตราส่วนคุณลักษณะการค้นหา",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "อิทธิพลที่เกิดจากไฟล์ดัชนี ค่าที่สูงขึ้นสอดคล้องกับอิทธิพลที่มากขึ้น อย่างไรก็ตาม การเลือกใช้ค่าที่ต่ํากว่าสามารถช่วยบรรเทาสิ่งประดิษฐ์ที่มีอยู่ในเสียงได้",
+ "Volume Envelope": "ซองจดหมายปริมาณ",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "แทนที่หรือผสมกับซองปริมาตรของเอาต์พุต ยิ่งอัตราส่วนใกล้เคียงกับ 1 มากเท่าไหร่ก็ยิ่งใช้ซองเอาต์พุตมากขึ้นเท่านั้น",
+ "Protect Voiceless Consonants": "ปกป้องพยัญชนะไร้เสียง",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "ปกป้องพยัญชนะและเสียงหายใจที่แตกต่างกันเพื่อป้องกันการฉีกขาดของอิเล็กโทรอะคูสติกและสิ่งประดิษฐ์อื่นๆ การดึงพารามิเตอร์ไปที่ค่าสูงสุด 0.5 ให้การป้องกันที่ครอบคลุม อย่างไรก็ตาม การลดค่านี้อาจลดขอบเขตของการป้องกันในขณะที่อาจบรรเทาผลกระทบจากการจัดทําดัชนี",
+ "Pitch extraction algorithm": "อัลกอริธึมการแยกระดับเสียง",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "อัลกอริธึมการแยกระดับเสียงเพื่อใช้สําหรับการแปลงเสียง อัลกอริทึมเริ่มต้นคือ rmvpe ซึ่งแนะนําสําหรับกรณีส่วนใหญ่",
+ "Convert": "แปลง",
+ "Export Audio": "ส่งออกเสียง",
+ "Batch": "ชุด",
+ "Input Folder": "โฟลเดอร์อินพุต",
+ "Select the folder containing the audios to convert.": "เลือกโฟลเดอร์ที่มีไฟล์เสียงที่จะแปลง",
+ "Enter input path": "ป้อนเส้นทางอินพุต",
+ "Output Folder": "โฟลเดอร์ผลลัพธ์",
+ "Select the folder where the output audios will be saved.": "เลือกโฟลเดอร์ที่จะบันทึกไฟล์เสียงที่ส่งออก",
+ "Enter output path": "ป้อนเส้นทางเอาต์พุต",
+ "Get information about the audio": "รับข้อมูลเกี่ยวกับเสียง",
+ "Information about the audio file": "ข้อมูลเกี่ยวกับไฟล์เสียง",
+ "Waiting for information...": "รอข้อมูล...",
+ "## Voice Blender": "## เครื่องปั่นเสียง",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "เลือกรุ่นเสียงสองแบบตั้งค่าเปอร์เซ็นต์การผสมที่คุณต้องการและผสมผสานเป็นเสียงใหม่ทั้งหมด",
+ "Voice Blender": "เครื่องปั่นเสียง",
+ "Drag and drop your model here": "ลากและวางโมเดลของคุณที่นี่",
+ "You can also use a custom path.": "คุณยังสามารถใช้เส้นทางที่กําหนดเอง",
+ "Blend Ratio": "อัตราส่วนการผสม",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "การปรับตําแหน่งไปทางด้านใดด้านหนึ่งมากขึ้นจะทําให้โมเดลคล้ายกับครั้งแรกหรือครั้งที่สองมากขึ้น",
+ "Fusion": "ฟิวชั่น",
+ "Path to Model": "เส้นทางสู่โมเดล",
+ "Enter path to model": "ป้อนเส้นทางไปยังแบบจําลอง",
+ "Model information to be placed": "ข้อมูลรุ่นที่จะวาง",
+ "Inroduce the model information": "ให้ข้อมูลโมเดล",
+ "The information to be placed in the model (You can leave it blank or put anything).": "ข้อมูลที่จะวางในแบบจําลอง (คุณสามารถเว้นว่างไว้หรือใส่อะไรก็ได้)",
+ "View model information": "ดูข้อมูลรุ่น",
+ "Introduce the model pth path": "แนะนําเส้นทาง pth ของโมเดล",
+ "View": "ทิวทัศน์",
+ "Model extraction": "การสกัดแบบจําลอง",
+ "Model conversion": "การแปลงโมเดล",
+ "Pth file": "ไฟล์ Pth",
+ "Output of the pth file": "เอาต์พุตของไฟล์ pth",
+ "# How to Report an Issue on GitHub": "# วิธีรายงานปัญหาใน GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. คลิกที่ปุ่ม 'บันทึกหน้าจอ' ด้านล่างเพื่อเริ่มบันทึกปัญหาที่คุณพบ",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. เมื่อคุณบันทึกปัญหาเสร็จแล้ว ให้คลิกที่ปุ่ม 'หยุดการบันทึก' (ปุ่มเดิม แต่ป้ายกํากับจะเปลี่ยนขึ้นอยู่กับว่าคุณกําลังบันทึกอยู่หรือไม่)",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. ไปที่ [GitHub Issues](https://github.com/IAHispano/Applio/issues) และคลิกที่ปุ่ม 'New Issue'",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. กรอกเทมเพลตปัญหาที่ให้มา เพื่อให้แน่ใจว่ามีรายละเอียดตามต้องการ และใช้ส่วนเนื้อหาเพื่ออัปโหลดไฟล์ที่บันทึกไว้จากขั้นตอนก่อนหน้า",
+ "Record Screen": "หน้าจอบันทึก",
+ "Record": "บันทึก",
+ "Stop Recording": "หยุดการบันทึก",
+ "Introduce the model .pth path": "แนะนําโมเดล .pth path",
+ "See Model Information": "ดูข้อมูลรุ่น",
+ "## Download Model": "## ดาวน์โหลดโมเดล",
+ "Model Link": "ลิงค์โมเดล",
+ "Introduce the model link": "แนะนําลิงค์โมเดล",
+ "Download Model": "ดาวน์โหลดโมเดล",
+ "## Drop files": "## วางไฟล์",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "ลากไฟล์ .pth และไฟล์ .index ลงในช่องว่างนี้ ลากหนึ่งแล้วลากอีกอันหนึ่ง",
+ "TTS Voices": "เสียง TTS",
+ "Select the TTS voice to use for the conversion.": "เลือกเสียง TTS ที่จะใช้สําหรับการแปลง",
+ "Text to Synthesize": "ข้อความที่จะสังเคราะห์",
+ "Enter the text to synthesize.": "ป้อนข้อความที่จะสังเคราะห์",
+ "Or you can upload a .txt file": "หรือคุณสามารถอัปโหลดไฟล์ .txt",
+ "Enter text to synthesize": "ป้อนข้อความที่จะสังเคราะห์",
+ "Output Path for TTS Audio": "เส้นทางเอาต์พุตสําหรับเสียง TTS",
+ "Output Path for RVC Audio": "เส้นทางเอาต์พุตสําหรับเสียง RVC",
+ "Enable Applio integration with Discord presence": "เปิดใช้งานการรวม Applio กับการแสดงตนของ Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "มันจะเปิดใช้งานความเป็นไปได้ในการแสดงกิจกรรม Applio ปัจจุบันใน Discord",
+ "Enable Applio integration with applio.org/models using flask": "เปิดใช้งานการรวม Applio กับ applio.org/models โดยใช้ขวด",
+ "It will activate the possibility of downloading models with a click from the website.": "มันจะเปิดใช้งานความเป็นไปได้ในการดาวน์โหลดโมเดลด้วยการคลิกจากเว็บไซต์",
+ "Theme": "หัวข้อ",
+ "Select the theme you want to use. (Requires restarting Applio)": "เลือกธีมที่คุณต้องการใช้ (ต้องรีสตาร์ท Applio)",
+ "Language": "ภาษา",
+ "Select the language you want to use. (Requires restarting Applio)": "เลือกภาษาที่คุณต้องการใช้ (ต้องรีสตาร์ท Applio)",
+ "Plugin Installer": "ตัวติดตั้งปลั๊กอิน",
+ "Drag your plugin.zip to install it": "ลาก plugin.zip ของคุณเพื่อติดตั้ง",
+ "Version Checker": "ตัวตรวจสอบเวอร์ชัน",
+ "Check which version of Applio is the latest to see if you need to update.": "ตรวจสอบว่า Applio เวอร์ชันใดเป็นเวอร์ชันล่าสุดเพื่อดูว่าคุณจําเป็นต้องอัปเดตหรือไม่",
+ "Check for updates": "ตรวจหาการอัปเดต"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/tr_TR.json b/assets/i18n/languages/tr_TR.json
new file mode 100644
index 0000000000000000000000000000000000000000..614de44ba850dfd997ebf31df2788d34300743fb
--- /dev/null
+++ b/assets/i18n/languages/tr_TR.json
@@ -0,0 +1,215 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Üstün ses kopyalama aracı, rakipsiz güç, modülerlik ve kullanıcı dostu deneyim için özenle optimize edilmiştir.\n[Türkçe çeviri: Enes](https://discord.com/users/1140031358006202468)",
+ "This section contains some extra utilities that often may be in experimental phases.": "Bu bölümde, genellikle deneysel aşamalarda olabilecek bazı ekstra yardımcı programlar bulunur.",
+ "Output Information": "Çıktı Bilgileri",
+ "The output information will be displayed here.": "Çıktı bilgileri burada görüntülenecektir.",
+
+ "Inference": "Arayüz",
+ "Train": "Eğitim",
+ "Extra": "Ekstra",
+ "Merge Audios": "Sesleri Birleştir",
+ "Processing": "İşleme",
+ "Audio Analyzer": "Ses Analizörü",
+ "Model Information": "Model Bilgileri",
+ "Plugins": "Eklentiler",
+ "Download": "İndir",
+ "Report a Bug": "Bir Hata Bildir",
+ "Settings": "Ayarlar",
+
+ "Preprocess": "İşlem Öncesi",
+ "Model Name": "Model Adı",
+ "Name of the new model.": "Yeni modelin adı.",
+ "Enter model name": "Model adını girin",
+ "Dataset Path": "Veri Kümesi Yolu",
+ "Path to the dataset folder.": "Veri kümesi klasörünün yolu.",
+ "Refresh Datasets": "Veri Kümelerini Yenile",
+ "Dataset Creator": "Veri Kümesi Oluşturucu",
+ "Dataset Name": "Veri Kümesi Adı",
+ "Name of the new dataset.": "Yeni veri kümesinin adı.",
+ "Enter dataset name": "Veri kümesi adını girin",
+ "Upload Audio Dataset": "Ses Veri Kümesini Yükle",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Ses dosyası veri kümesine başarıyla eklendi. Lütfen işlem öncesi düğmesine tıklayın.",
+ "Enter dataset path": "Veri kümesi yolunu girin",
+ "Sampling Rate": "Örnekleme Hızı",
+ "The sampling rate of the audio files.": "Ses dosyalarının örnekleme hızı.",
+ "Model Architecture": "RVC Sürümü",
+ "Version of the model architecture.": "Modelin RVC sürümü.",
+ "Preprocess Dataset": "Veri Kümesini İşlem Öncesinden Geçir",
+
+ "Embedder Model": "Gömme Modeli",
+ "Model used for learning speaker embedding.": "Konuşmacı gömmelerini öğrenmek için kullanılan model.",
+ "Extract": "Çıkar",
+ "Hop Length": "Adım Uzunluğu",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Sistemin önemli bir perde değişikliğine geçmesi için gereken süreyi belirtir. Daha küçük adım uzunlukları, çıkarsama için daha fazla zaman gerektirir ancak daha yüksek perde doğruluğu sağlar.",
+ "Batch Size": "Toplu Boyutu",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Bunu GPU'nuzun mevcut VRAM'iyle uyumlu hale getirmeniz önerilir. 4'lük bir ayar daha iyi doğruluk ancak daha yavaş işleme sunarken, 8 daha hızlı ve standart sonuçlar sağlar.",
+ "Save Every Epoch": "Her Dönemde Kaydet",
+ "Determine at how many epochs the model will saved at.": "Modelin kaçıncı çağlarda kaydedileceğini belirleyin.",
+ "Total Epoch": "Toplam Çağ",
+ "Specifies the overall quantity of epochs for the model training process.": "Model eğitim süreci için gereken toplam dönem sayısını belirtir.",
+ "Pretrained": "Önceden Eğitimli",
+ "Save Only Latest": "Sadece Son Sürümü Kaydet",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Bu ayarın etkinleştirilmesi, G ve D dosyalarının yalnızca en son sürümlerinin kaydedilmesine neden olur ve depolama alanından tasarruf sağlar.",
+ "Save Every Weights": "Tüm Ağırlıkları Kaydet",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Bu ayar, her dönemin sonunda modelin ağırlıklarını kaydetmenizi sağlar.",
+ "Custom Pretrained": "Özel Önceden Eğitimli",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Özel önceden eğitilmiş modelleri kullanmak üstün sonuçlar doğurabilir, çünkü belirli kullanım durumuna göre uyarlanmış en uygun önceden eğitilmiş modelleri seçmek performansı önemli ölçüde artırabilir.",
+ "Upload Pretrained Model": "Önceden Eğitimli Model Yükle",
+ "Refresh Custom Pretraineds": "Özel Önceden Eğitimleri Yenile",
+ "Pretrained Custom Settings": "Önceden Eğitimli Özel Ayarlar",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Bıraktığınız dosya geçerli bir önceden eğitilmiş dosya değil. Lütfen tekrar deneyin.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Açılır menüde önceden eğitilmiş dosyayı görmek için yenile düğmesine tıklayın.",
+ "Pretrained G Path": "Özel Önceden Eğitimli G",
+ "Pretrained D Path": "Özel Önceden Eğitimli D",
+ "GPU Settings": "GPU Ayarları",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Gelişmiş GPU ayarlarını belirler, daha iyi GPU mimarisine sahip kullanıcılar için önerilir.",
+ "GPU Custom Settings": "GPU Özel Ayarları",
+ "GPU Number": "GPU Numarası",
+ "0 to ∞ separated by -": "0'dan sonsuza kadar - ile ayrılmış",
+ "GPU Information": "GPU Bilgileri",
+ "Pitch Guidance": "Perde Rehberliği",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Perde rehberliğini kullanarak, orijinal sesin tonlamasını, perde de dahil olmak üzere yansıtmak mümkün hale gelir. Bu özellik, özellikle orijinal melodiyi veya perde düzenini korumak için hayati önem taşıyan şarkı söyleme ve diğer senaryolar için değerlidir.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Kendi modelinizi eğitirken önceden eğitilmiş modelleri kullanın. Bu yaklaşım, eğitim süresini azaltır ve genel kaliteyi artırır.",
+ "Extract Features": "Özellikleri Çıkar",
+ "Overtraining Detector": "Aşırı Eğitim Dedektörü",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Modelin eğitim verilerini çok iyi öğrenmesini ve yeni verilere genelleme yeteneğini kaybetmesini önlemek için aşırı eğitimi tespit edin.",
+ "Overtraining Detector Settings": "Aşırı Eğitim Dedektörü Ayarları",
+ "Overtraining Threshold": "Aşırı Eğitim Eşiği",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Herhangi bir iyileşme tespit edilmezse modelinizin eğitimini durdurmasını istediğiniz maksimum epoch sayısını ayarlayın.",
+
+ "Start Training": "Eğitmeye Başla",
+ "Stop Training & Restart Applio": "Eğitimi Durdur ve Applio'yu Yeniden Başlat",
+ "Generate Index": "Dizin Oluştur",
+
+ "Export Model": "Modeli Dışa Aktar",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'Yükle' düğmesi yalnızca google colab içindir: Dışa aktarılan dosyaları Google Drive'ınızdaki ApplioExported klasörüne yükler.",
+ "Exported Pth file": "Dışa Aktarılan Pth dosyası",
+ "Exported Index file": "Dışa Aktarılan Dizin dosyası",
+ "Select the pth file to be exported": "Dışa aktarılacak pth dosyasını seçin",
+ "Select the index file to be exported": "Dışa aktarılacak dizin dosyasını seçin",
+ "Upload": "Yükle",
+
+ "Voice Model": "Ses Modeli",
+ "Select the voice model to use for the conversion.": "Dönüştürme için kullanılacak ses modelini seçin.",
+ "Index File": "Dizin Dosyası",
+ "Select the index file to use for the conversion.": "Dönüştürme için kullanılacak dizin dosyasını seçin.",
+ "Refresh": "Yenile",
+ "Unload Voice": "Sesi Kaldır",
+
+ "Single": "Tek",
+ "Upload Audio": "Ses Yükle",
+ "Select Audio": "Ses Seç",
+ "Select the audio to convert.": "Dönüştürülecek sesi seçin.",
+ "Advanced Settings": "Gelişmiş Ayarlar",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Çıktıları Temizle (assets/audios klasöründeki tüm sesleri siler)",
+ "Custom Output Path": "Özel Çıktı Yolu",
+ "Output Path": "Çıktı Yolu",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Varsayılan olarak assets/audios/output.wav'da kaydedilecek çıktı sesinin yolu",
+ "Split Audio": "Sesi Böl",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Daha iyi sonuçlar elde etmek için bazı durumlarda çıkarım için sesi parçalara bölün.",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Çıkarımlarınıza yumuşak bir autotune uygulayın, şarkı dönüştürmeleri için önerilir.",
+ "Clean Audio": "Sesi Temizle",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Ses algılama algoritmaları kullanarak ses çıktınızı temizleyin, konuşan sesler için önerilir.",
+ "Clean Strength": "Temiz Gücü",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "İstediğiniz sese temizleme seviyesini ayarlayın, ne kadar artırırsanız o kadar temizler, ancak sesin daha fazla sıkıştırılması mümkündür.",
+ "Pitch": "Perde",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Sesin perdesini ayarlayın, değer ne kadar yüksek olursa perde o kadar yüksek olur.",
+ "Filter Radius": "Filtre Yarıçapı",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Sayı üçten büyük veya ona eşitse, toplanan ton sonuçlarına ortanca filtreleme uygulamak solunumu azaltabilir.",
+ "Search Feature Ratio": "Arama Özelliği Oranı",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Dizin dosyası tarafından uygulanan etki; daha yüksek bir değer daha büyük etkiye karşılık gelir. Ancak daha düşük değerler seçmek, sesteki var olan eserlerin azaltılmasına yardımcı olabilir.",
+ "Volume Envelope": "Ses Zarıfı",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Çıktının ses zarfı ile değiştirme veya karıştırma yapın. Oran 1'e ne kadar yakınsa, çıktı zarfı o kadar fazla kullanılır.",
+ "Protect Voiceless Consonants": "Sessiz Ünsüzleri Koru",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Elektro-akustik yırtılmayı ve diğer eserleri önlemek için farklı ünsüzleri ve solunum seslerini koruyun. Parametreyi maksimum değeri 0,5'e çekmek kapsamlı koruma sağlar. Ancak bu değeri azaltmak, koruma kapsamını azaltırken dizin oluşturma etkisini azaltabilir.",
+ "Pitch extraction algorithm": "Perde çıkarma algoritması",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Ses dönüşümü için kullanılacak perde çıkarma algoritması. Varsayılan algoritma, çoğu durum için önerilen rmvpedir.",
+
+ "Convert": "Dönüştür",
+ "Export Audio": "Sesi Dışa Aktar",
+
+ "Batch": "Toplu",
+ "Input Folder": "Giriş Klasörü",
+ "Select the folder containing the audios to convert.": "Dönüştürülecek sesleri içeren klasörü seçin.",
+ "Enter input path": "Giriş yolunu girin",
+ "Output Folder": "Çıktı Klasörü",
+ "Select the folder where the output audios will be saved.": "Çıktı seslerinin kaydedileceği klasörü seçin.",
+ "Enter output path": "Çıktı yolunu girin",
+
+ "Get information about the audio": "Ses hakkında bilgi al",
+
+ "## Voice Blender": "## Ses Karıştırıcı",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "İki ses modeli seçin, istediğiniz karışım yüzdesini ayarlayın ve bunları tamamen yeni bir sese karıştırın.",
+ "Voice Blender": "Ses Karıştırıcı",
+ "Drag and drop your model here": "Modelinizi buraya sürükleyip bırakın",
+ "You can also use a custom path.": "Özel bir yol da kullanabilirsiniz.",
+ "Blend Ratio": "Karışım Oranı",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Konumu bir yöne veya diğerine daha fazla ayarlayarak modeli birinci veya ikinciye daha benzer hale getireceksiniz.",
+ "Fusion": "Birleştirme",
+
+ "Path to Model": "Modele Giden Yol",
+ "Enter path to model": "Modele giden yolu girin",
+ "Model information to be placed": "Yerleştirilecek model bilgileri",
+ "Inroduce the model information": "Model bilgilerini tanıtın",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Modele yerleştirilecek bilgiler (Boş bırakabilir veya bir şey koyabilirsiniz).",
+ "View model information": "Model bilgilerini görüntüleyin",
+ "Introduce the model pth path": "Model pth yolunu tanıtın",
+ "View": "Görüntüle",
+ "Model extraction": "Model çıkarımı",
+ "Model conversion": "Model dönüştürme",
+ "Pth file": "Pth dosyası",
+ "Output of the pth file": "Pth dosyasının çıktısı",
+
+ "# How to Report an Issue on GitHub": "# GitHub'da Bir Sorunu Nasıl Bildirirsiniz",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Yaşadığınız sorunu kaydetmeye başlamak için aşağıdaki 'Ekran Kaydı' düğmesine tıklayın.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Sorunu kaydetmeyi tamamladığınızda, 'Kaydı Durdur' düğmesine tıklayın (aynı düğme, ancak aktif olarak kayıt yapıp yapmadığınıza bağlı olarak etiket değişir).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Sorunlarına](https://github.com/IAHispano/Applio/issues) gidin ve 'Yeni Sorun' düğmesine tıklayın.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Sağlanan sorun şablonunu tamamlayın, gerektiği gibi ayrıntıları eklediğinizden emin olun ve önceki adımdan kaydedilen dosyayı yüklemek için varlıklar bölümünü kullanın.",
+
+ "Record Screen": "Ekranı Kaydet",
+ "Record": "Kayıt Yap",
+ "Stop Recording": "Kaydı Durdur",
+
+ "Introduce the model .pth path": "Model .pth yolunu tanıtın",
+ "See Model Information": "Model Bilgilerini Görün",
+
+ "## Download Model": "## Model İndirin",
+ "Model Link": "Model Bağlantısı",
+ "Introduce the model link": "Model bağlantısını tanıtın",
+ "Download Model": "Modeli İndirin",
+ "## Drop files": "## Dosyaları bırakın",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": ".pth dosyanızı ve .index dosyanızı bu alana sürükleyin. Önce birini sonra diğerini sürükleyin.",
+ "## Search Model": "## Modeli Ara",
+ "Search": "Ara",
+ "Introduce the model name to search.": "Aramak için model adını girin.",
+ "We couldn't find models by that name.": "Bu isim'e sahip bir model bulunamadı.",
+
+ "TTS Voices": "TTS Sesleri",
+ "Select the TTS voice to use for the conversion.": "Dönüşüm için kullanılacak TTS sesini seçin.",
+ "Text to Synthesize": "Sentezlemek İçin Metin",
+ "Enter the text to synthesize.": "Sentezlemek için metni girin.",
+ "Or you can upload a .txt file": "Veya bir .txt dosyası yükleyebilirsiniz",
+ "Enter text to synthesize": "Sentezlemek için metin girin",
+ "Output Path for TTS Audio": "TTS Sesinin Çıktı Yolu",
+ "Output Path for RVC Audio": "RVC Sesinin Çıktı Yolu",
+
+ "Enable Applio integration with Discord presence": "Discord oynuyor Applio entegrasyonunu etkinleştirin",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Applio geçerli etkinliğini Discord'da görüntüleme olasılığını etkinleştirir.",
+ "Enable Applio integration with applio.org/models using flask": "Flask kullanarak applio.org/models ile Applio entegrasyonunu etkinleştirin",
+ "It will activate the possibility of downloading models with a click from the website.": "Web sitesinden bir tıklama ile modellerin indirilmesi olasılığını etkinleştirir.",
+ "Enable fake GPU": "Sahte GPU'yu Etkinleştir",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "GPU olmadığı için eğitim şu anda desteklenmiyor. Eğitim sekmesini etkinleştirmek için ayarlar sekmesine gidin ve 'Sahte GPU' seçeneğini etkinleştirin.",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Eğitim sekmesini etkinleştirir. Ancak, bu cihazda GPU yetenekleri olmadığından, eğitim desteklenmez. Bu seçenek yalnızca test amaçlıdır. (Bu seçenek Applio'yu yeniden başlatacaktır)",
+ "Theme": "Tema",
+ "Select the theme you want to use. (Requires restarting Applio)": "Kullanmak istediğiniz temayı seçin. (Applio'yu yeniden başlatmayı gerektirir)",
+ "Language": "Dil",
+ "Select the language you want to use. (Requires restarting Applio)": "Kullanmak istediğiniz dili seçin. (Applio'yu yeniden başlatmayı gerektirir)",
+
+ "Plugin Installer": "Plugin Yükleyici",
+ "Drag your plugin.zip to install it": "Yüklemek için plugin.zip'inizi sürükleyin",
+
+ "Version Checker": "Sürüm Denetleyicisi",
+ "Check which version of Applio is the latest to see if you need to update.": "Güncellemeniz gerekip gerekmediğini görmek için Applio'nun hangi sürümünün en son olduğunu kontrol edin.",
+ "Check for updates": "Güncellemeleri kontrol et"
+}
diff --git a/assets/i18n/languages/uk_UK.json b/assets/i18n/languages/uk_UK.json
new file mode 100644
index 0000000000000000000000000000000000000000..ea2441cfffad635d76330de546830bc184004fb6
--- /dev/null
+++ b/assets/i18n/languages/uk_UK.json
@@ -0,0 +1,231 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Найкращий інструмент для клонування голосу, ретельно оптимізований для неперевершеної потужності, модульності та зручного використання.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Цей розділ містить деякі додаткові утиліти, які часто можуть перебувати в експериментальних стадіях.",
+ "Output Information": "Вихідна інформація",
+ "The output information will be displayed here.": "Тут буде відображена вихідна інформація.",
+
+ "Inference": "Заміна голосу",
+ "Train": "Навчання моделі",
+ "Extra": "Додатково",
+ "Merge Audios": "Об'єднання аудіо",
+ "Processing": "Обробка",
+ "Audio Analyzer": "Аналізатор звуку",
+ "Model Information": "Інформація про модель",
+ "Plugins": "Плагіни",
+ "Download": "Завантажити",
+ "Report a Bug": "Повідомити про помилку",
+ "Settings": "Параметри",
+
+ "Preprocess": "Обробка",
+ "Model Name": "Назва моделі",
+ "Name of the new model.": "Назва нової моделі.",
+ "Enter model name": "Введіть назву моделі",
+ "Dataset Path": "Шлях датасету",
+ "Path to the dataset folder.": "Шлях до папки з аудіо датасетом.",
+ "Refresh Datasets": "Оновити датасети",
+ "Dataset Creator": "Створити датасет",
+ "Dataset Name": "Назва датасету",
+ "Name of the new dataset.": "Назва нового датасету.",
+ "Enter dataset name": "Введіть назву датасету",
+ "Upload Audio Dataset": "Завантажити датасет",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Аудіофайл успішно додано до датасету. Будь ласка, натисніть кнопку обробки датасету.",
+ "Enter dataset path": "Введіть шлях до датасету",
+ "Sampling Rate": "Частота дискретизації",
+ "The sampling rate of the audio files.": "Частота дискретизації аудіофайлів.",
+ "Model Architecture": "Версія RVC",
+ "Version of the model architecture.": "Версія моделі RVC.",
+ "Preprocess Dataset": "Обробити датасет",
+
+ "Embedder Model": "Модель розгортання",
+ "Model used for learning speaker embedding.": "Модель, що використовується для навчання динаміки голосу.",
+ "Extract": "Видобування",
+ "Hop Length": "Довжина кроку",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Позначає час, необхідний для обробки, системою для виявлення значної зміни висоти тону. Менша довжина кроку вимагає більше часу для обробки, але, як правило, забезпечує вищу точність визначення висоти тону.",
+ "Batch Size": "Використання відеопам'яті (ГБ)",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Кількість VRAM яка буде використана в процесі навчання. Бажано встановити значення відповідно до доступної відеопам'яті вашої відеокарти. Значення 4 забезпечує кращу точність, але повільнішу обробку, тоді як 8 забезпечує швидші та стандартні результати.",
+ "Save Every Epoch": "Частота збереження епох",
+ "Determine at how many epochs the model will saved at.": "Визначає інтервал збереження проміжкових епох.",
+ "Total Epoch": "Кількість епох",
+ "Specifies the overall quantity of epochs for the model training process.": "Визначає загальну кількість епох для процесу навчання моделі.",
+ "Pretrained": "Попередньо навчені моделі",
+ "Save Only Latest": "Зберігати лише останні",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Увімкнення цієї опції призведе до того, що файли G і D (які потрібні для відновлення навчаня моделі) будуть збережені лише в останній версії. Ефективно заощаджуючи місце на диску, але без можливості відновити навчання з попередніх епох.",
+ "Save Every Weights": "Збереження проміжкових моделей",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Цей параметр дозволяє зберігати проможкові епохи моделі у встановленому інтервалі.",
+ "Custom Pretrained": "Користувацькі попередньо навчені моделі",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Використання користувацьких попередньо навчених моделей може призвести до чудових результатів навчання, оскільки вибір найбільш підходящих попередньо навчених моделей, адаптованих до конкретного випадку використання, може значно підвищити продуктивність. (Увімкніть цей параметр для відновлення призупиненого навчання.)",
+ "Upload Pretrained Model": "Завантажте попередньо навчену модель",
+ "Refresh Custom Pretraineds": "Оновити",
+ "Pretrained Custom Settings": "Налаштування попередньо навчених моделей",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Файл, який ви скинули, не є дійсним попередньо навченим файлом. Будь ласка, спробуйте ще раз.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Натисніть кнопку «Оновити», щоб переглянути попередньо підготовлений файл у розгортуваному меню.",
+ "Pretrained G Path": "Шлях до G файлу",
+ "Pretrained D Path": "Шлях до D файлу",
+ "GPU Settings": "Налаштування GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Встановлення розширених налаштувань графічного процесора, рекомендовано для користувачів з кількома відеокартами.",
+ "GPU Custom Settings": "Налаштування GPU",
+ "GPU Number": "Індексовані GPU",
+ "0 to ∞ separated by -": "Вкажіть індекс графічних процесорів, які ви хочете використати для навчання, ввівши їх через дефіс (-). Наприклад: 0-1-3 ",
+ "GPU Information": "Список GPU",
+ "Pitch Guidance": "Керування тоном голосу",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Використовуючи висоту тону, стає можливим відтворити інтонацію оригінального голосу, включаючи його висоту. Ця функція особливо корисна для співу та інших сценаріїв, де важливо зберегти оригінальну мелодію або висоту тону.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Використовуйте попередньо навчені моделі, коли тренуєте свої власні. Такий підхід скорочує тривалість навчання та підвищує загальну якість.",
+ "Extract Features": "Обробити характеристики голосу",
+ "Overtraining Detector": "Детектор перенавченості",
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Виявляє перенавчання моделі, щоб запобігти надмірному засвоєнню навчальної інформації та втраті здатності до узагальнення нових даних.",
+ "Overtraining Detector Settings": "Налаштування детектора перенавченості",
+ "Overtraining Threshold": "Поріг перетренованості",
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Встановіть максимальну кількість епох, через яку модель припинить навчання, якщо не буде виявлено ознак прогрессу.",
+ "Sync Graph": "Синхронізація графіків",
+ "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Синхронізувати графіки «tensorbaord» для відображення процесу навчання. Використовуйте цей параметр, тільки якщо ви навчаєте нову модель.",
+
+ "Start Training": "Почати навчання",
+ "Stop Training & Restart Applio": "Зупинити навчання та Перезапустити Applio",
+ "Generate Index": "Згенерувати індекс",
+
+ "Export Model": "Експортувати модель",
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "Кнопка «Завантажити» призначена лише для google colab: Завантажує експортовані файли до папки ApplioExported у вашому Google Диску.",
+ "Exported Pth file": "Експортований Pth файл",
+ "Exported Index file": "Експортований Index файл",
+ "Select the pth file to be exported": "Виберіть pth файл для експорту",
+ "Select the index file to be exported": "Виберіть файл індексу для експорту",
+ "Upload": "Завантажити",
+
+ "Voice Model": "Голосова модель",
+ "Select the voice model to use for the conversion.": "Виберіть модель голосу, яку буде використано для перетворення.",
+ "Index File": "Індекс файл",
+ "Select the index file to use for the conversion.": "Виберіть індекс, який буде використано для перетворення.",
+ "Refresh": "Оновити",
+ "Unload Voice": "Скинути голос",
+
+ "Single": "Один файл",
+ "Upload Audio": "Завантажити аудіо",
+ "Select Audio": "Виберіть Аудіо",
+ "Select the audio to convert.": "Виберіть аудіо, яке потрібно конвертувати.",
+ "Advanced Settings": "Розширені налаштування",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Очистити результати (видаляє всі аудіофайли в assets/audios",
+ "Custom Output Path": "Власний вихідний шлях",
+ "Output Path": "Вихідний шлях",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Шлях, куди буде збережено вихідний аудіофайл, за замовчуванням у assets/audios/output.wav",
+ "Split Audio": "Розділити аудіо",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Розділяє аудіо на фрагменти для виведення - в деяких випадках це сприяє отриманню кращих результатів.",
+ "Autotune": "Автотюн",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Додає до вихідного файлу м'який автотюн, рекомендується для вокальних перетворень.",
+ "Clean Audio": "Очищення аудіо",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Очищення вихідного звуку за допомогою алгоритмів виявлення шуму, рекомендується для розмовних перетворень.",
+ "Clean Strength": "Сила очищення",
+ "Upscale Audio": "Покращення аудіо",
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Підвищує якість аудіо до більш високої, рекомендується для аудіо низької якості. (Обробка аудіо може зайняти більше часу)",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Встановіть рівень очищення відповідно до бажаного результату, чим більше значення, тим більше аудіо буде очищено, при значеннях близьких до 1 можлива поява артефактів.",
+ "Pitch": "Тембр голосу",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Встановлення висоти голосу (кількість півтонів, підвищити на октаву: 12, понизити на октаву: -12). Рекомендована позначка 12 для перетворення чоловічого голосу на жіночий, і -12 для перетворення жіночого голосу на чоловічий. Якщо діапазон звуку занадто далеко і голос спотворюється, ви можете самостійно відрегулювати його до відповідного діапазону.",
+ "Filter Radius": "Радіус фільтра",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Якщо це число більше або дорівнює трьом, застосування медіанної фільтрації на зібраних результатах тонів, може призвести до зменшення звуків дихання.",
+ "Search Feature Ratio": "Співвідношення функцій пошуку",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Вплив файлу індексації; чим більше значення, тим більший вплив. Однак, вибір менших значень може допомогти зменшити артефакти, присутні в аудіо.",
+ "Volume Envelope": "Динаміка гучності",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Замінити або змішати з форму вихідного сигналу. Чим ближче співвідношення до 1, тим більше використовується динаміка гучності оригінального аудіофайлу.",
+ "Protect Voiceless Consonants": "Захист глухих приголосних",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Захист чіткості приголосних і звуків дихання для запобігання електроакустичного розриву та інших артефактів. Збільшення цього параметра до максимального значення 0,5 забезпечує комплексний захист. Однак зменшення цього значення може зменшити ступінь захисту, а також потенційно послабити ефект індексування.",
+ "Pitch extraction algorithm": "Алгоритм виділення тембру",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Алгоритм виділення висоти тону голосу, який використовуватиметься для перетворення аудіо. ('pm': швидке перетворення, але нижча якість звучання; 'harvest': кращі басові тембри, але дуже повільний). Алгоритм за замовчуванням - rmvpe, який рекомендується для більшості випадків.",
+
+ "Convert": "Перетворити",
+ "Export Audio": "Експорт аудіо",
+
+ "Batch": "Група файлів",
+ "Input Folder": "Вхідна тека",
+ "Select the folder containing the audios to convert.": "Виберіть теку, що містить аудіозаписи для перетворення.",
+ "Enter input path": "Введіть вхідний шлях",
+ "Output Folder": "Вихідна тека",
+ "Select the folder where the output audios will be saved.": "Виберіть теку, куди будуть збережені вихідні аудіозаписи.",
+ "Enter output path": "Введіть шлях для збереження",
+
+ "Get information about the audio": "Отримати інформацію про аудіо",
+
+ "## Voice Blender": "Голосовий мікшер",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Виберіть дві моделі голосу, встановіть бажаний відсоток змішування і змішайте їх в абсолютно новий голос.",
+ "Voice Blender": "Голосовий мікшер",
+ "Drag and drop your model here": "Перетягніть модель сюди",
+ "You can also use a custom path.": "Ви також можете використовувати власний шлях.",
+ "Blend Ratio": "Пропорції змішування",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Регулювання положення більше в ту чи іншу сторону зробить модель більш схожою на першу або другу.",
+ "Fusion": "З'єднати",
+ "Path to Model": "Шлях до моделі",
+ "Enter path to model": "Введіть шлях до моделі",
+ "Model information to be placed": "Інформація про модель, яку потрібно розмістити",
+ "Inroduce the model information": "Ознайомлення з інформацією про модель",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Інформація, яку потрібно розмістити в моделі (Ви можете залишити поле порожім або написати що завгодно).",
+ "View model information": "Перегляд інформації про модель",
+ "Introduce the model pth path": "Вкажіть шлях до pth файлу моделі",
+ "View": "Перегляд",
+ "Model extraction": "Видобування моделі",
+ "Model conversion": "Перетворення моделі",
+ "Pth file": "Pth файл",
+ "Output of the pth file": "Вихід pth файлу",
+
+ "# How to Report an Issue on GitHub": "# Як повідомити про проблему на GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Натисніть кнопку «Записати екран» нижче, щоб почати запис проблеми, з якою ви зіткнулися.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Після того, як ви закінчите запис помилки, натисніть кнопку «Зупинити запис» (та сама кнопка, але мітка змінюється залежно від того, активно ви записуєте чи ні).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Перейдіть до [GitHub Issues](https://github.com/IAHispano/Applio/issues) і натисніть кнопку 'New Issue'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Заповніть наданий шаблон про проблему, обов'язково включивши деталі за потреби, і скористайтеся розділом активів, щоб завантажити записаний файл з попереднього кроку.",
+
+ "Record Screen": "Записати екран",
+ "Record": "Запис",
+ "Stop Recording": "Зупинити запис екрану",
+
+ "Introduce the model .pth path": "Вкажіть шлях до .pth файлу моделі",
+ "See Model Information": "Перегляд інформації про модель",
+
+ "## Download Model": "Завантажити модель",
+ "Model Link": "Посилання на модель",
+ "Introduce the model link": "Введіть посилання на модель",
+ "Download Model": "Завантажити модель",
+ "## Drop files": "Додати модель",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Перетягніть файл .pth і файл .index у цей простір. Перетягніть їх по черзі.",
+ "## Search Model": "Шукати модель",
+ "Search": "Пошук",
+ "Introduce the model name to search.": "Введіть назву моделі.",
+ "We couldn't find models by that name.": "Ми не знайшли ні одної моделі зі схожим ім'ям.",
+ "## Download Pretrained Models": "Завантажити підготовлену модель",
+ "Select the pretrained model you want to download.": "Виберіть попередньо навчену модель, яку ви хочете завантажити.",
+ "And select the sampling rate": "І виберіть частоту дискретизації.",
+
+ "TTS Voices": "Голоси TTS",
+ "TTS Speed": "Швидкість TTS",
+ "Increase or decrease TTS speed.": "Збільшити або зменшити швидкість синтезу мови.",
+ "Select the TTS voice to use for the conversion.": "Виберіть голос синтезу мови, який буде використано для перетворення.",
+ "Text to Synthesize": "Текст для синтезу",
+ "Enter the text to synthesize.": "Введіть текст для синтезу.",
+ "Or you can upload a .txt file": "Або ви можете завантажити файл .txt",
+ "Enter text to synthesize": "Введіть текст для синтезу",
+ "Output Path for TTS Audio": "Вихідний шлях для TTS аудіо файлу",
+ "Output Path for RVC Audio": "Вихідний шлях для RVC аудіо файлу",
+
+ "Enable Applio integration with Discord presence": "Увімкнути Applio інтеграцію з Discord активністю",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Це активує можливість відображати поточну активність Applio в Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Увімкнути інтеграцію Applio з applio.org/models за допомогою Flaskk",
+ "It will activate the possibility of downloading models with a click from the website.": "Це активує можливість завантаження моделей одним кліком з веб-сайту.",
+ "Enable fake GPU": "Увімкнути фальшивий GPU",
+ "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "Наразі навчання не доступне через відсутність графічного процесора. Щоб увімкнути тренування, перейдіть на вкладку налаштувань і увімкніть опцію «Фальшивий GPU».",
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Активує вкладку тренування. Однак зауважте, що цей пристрій не має графічного процесора, тому тренування не підтримується. Ця опція призначена лише для тестування. (Ця опція перезапустить Applio)",
+ "Theme": "Теми",
+ "Select the theme you want to use. (Requires restarting Applio)": "Виберіть тему, яку хочете використовувати. (Потрібно перезапустити Applio)",
+ "Language": "Мова",
+ "Select the language you want to use. (Requires restarting Applio)": "Виберіть мову, яку ви хочете використовувати. (Потрібно перезапустити Applio)",
+
+ "Plugin Installer": "Інсталятор плагінів",
+ "Drag your plugin.zip to install it": "Перетягніть plugin.zip, щоб встановити його",
+
+ "Version Checker": "Перевірка Оновлень",
+ "Check which version of Applio is the latest to see if you need to update.": "Перевірте, яка версія Applio найновіша, щоб дізнатися, чи потрібно вам оновлюватися.",
+ "Check for updates": "Перевірити наявність оновлень",
+
+ "Export Format": "Формат експорту",
+ "Select the format to export the audio.": "Виберіть формат експорту аудіо.",
+
+ "Specify the number of GPUs you wish to utilize for training by entering them separated by hyphens (-).": "Вкажіть індекс графічних процесорів, які ви хочете використати для навчання, ввівши їх через дефіс (-). Наприклад: 0-1-3 ",
+ "The GPU information will be displayed here.": "Тут вказана інформація про графічні процесори (у форматі [Індекс:Інформація]).",
+ "Restart Applio": "Перезапустити Applio"
+
+}
diff --git a/assets/i18n/languages/ur_UR.json b/assets/i18n/languages/ur_UR.json
new file mode 100644
index 0000000000000000000000000000000000000000..1b5e37c1a3abed8b81477d886c9d5a3767b9f4d1
--- /dev/null
+++ b/assets/i18n/languages/ur_UR.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "حتمی صوتی کلوننگ ٹول ، بے مثال طاقت ، ماڈیولریٹی ، اور صارف دوست تجربے کے لئے احتیاط سے بہتر بنایا گیا ہے۔",
+ "This section contains some extra utilities that often may be in experimental phases.": "اس سیکشن میں کچھ اضافی افادیت شامل ہیں جو اکثر تجرباتی مراحل میں ہوسکتی ہیں۔",
+ "Output Information": "آؤٹ پٹ معلومات",
+ "The output information will be displayed here.": "آؤٹ پٹ کی معلومات یہاں ظاہر کی جائے گی۔",
+ "Inference": "استدلال",
+ "Train": "ٹرین",
+ "Extra": "اضافی",
+ "Merge Audios": "آڈیو کو ضم کریں",
+ "Processing": "پروسیسنگ",
+ "Audio Analyzer": "Audio Analyzer",
+ "Model Information": "ماڈل کی معلومات",
+ "Plugins": "پلگ ان",
+ "Download": "ڈاؤن لوڈ",
+ "Report a Bug": "ایک بگ کی رپورٹ کریں",
+ "Settings": "سیٹنگیں",
+ "Preprocess": "پری پروسیس",
+ "Model Name": "ماڈل کا نام",
+ "Name of the new model.": "نئے ماڈل کا نام",
+ "Enter model name": "ماڈل کا نام درج کریں",
+ "Dataset Path": "ڈیٹا سیٹ کا راستہ",
+ "Path to the dataset folder.": "ڈیٹا سیٹ فولڈر کا راستہ۔",
+ "Refresh Datasets": "تازہ ترین ڈیٹا سیٹ",
+ "Dataset Creator": "ڈیٹا سیٹ تخلیق کار",
+ "Dataset Name": "ڈیٹا سیٹ کا نام",
+ "Name of the new dataset.": "نئے ڈیٹا سیٹ کا نام۔",
+ "Enter dataset name": "ڈیٹا سیٹ کا نام درج کریں",
+ "Upload Audio Dataset": "آڈیو ڈیٹا سیٹ اپ لوڈ کریں",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "آڈیو فائل کو کامیابی سے ڈیٹا سیٹ میں شامل کیا گیا ہے۔ براہ کرم پری پروسیس بٹن پر کلک کریں۔",
+ "Enter dataset path": "ڈیٹا سیٹ کا راستہ درج کریں",
+ "Sampling Rate": "نمونے لینے کی شرح",
+ "The sampling rate of the audio files.": "آڈیو فائلوں کے نمونے لینے کی شرح۔",
+ "Model Architecture": "Model Architecture",
+ "Version of the model architecture.": "ماڈل کا آر وی سی ورژن۔",
+ "Preprocess Dataset": "پری پروسیس ڈیٹا سیٹ",
+ "Extract": "نکالنا",
+ "Hop Length": "ہاپ کی لمبائی",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "نظام کو ایک اہم پچ تبدیلی میں منتقل ہونے میں لگنے والے عرصے کی نشاندہی کرتا ہے۔ چھوٹی ہاپ لمبائی کو اندازہ لگانے کے لئے زیادہ وقت کی ضرورت ہوتی ہے لیکن پچ کی درستگی زیادہ ہوتی ہے۔",
+ "Batch Size": "Batch کا سائز",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "اسے اپنے جی پی یو کے دستیاب وی آر اے ایم کے ساتھ ہم آہنگ کرنے کا مشورہ دیا جاتا ہے۔ 4 کی ترتیب بہتر درستگی لیکن سست پروسیسنگ پیش کرتی ہے ، جبکہ 8 تیز اور معیاری نتائج فراہم کرتی ہے۔",
+ "Save Every Epoch": "ہر دور کو محفوظ کریں",
+ "Determine at how many epochs the model will saved at.": "اس بات کا تعین کریں کہ ماڈل کتنے ادوار میں محفوظ رہے گا۔",
+ "Total Epoch": "مجموعی دور کی نوعیت",
+ "Specifies the overall quantity of epochs for the model training process.": "ماڈل ٹریننگ کے عمل کے لئے ادوار کی مجموعی مقدار کی وضاحت کرتا ہے۔",
+ "Pretrained": "پہلے سے تربیت یافتہ",
+ "Save Only Latest": "صرف تازہ ترین محفوظ کریں",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "اس ترتیب کو فعال کرنے کے نتیجے میں جی اور ڈی فائلیں صرف اپنے تازہ ترین ورژن کو محفوظ کریں گی ، مؤثر طریقے سے اسٹوریج کی جگہ کو محفوظ کریں گی۔",
+ "Save Every Weights": "ہر وزن کو بچائیں",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "یہ ترتیب آپ کو ہر دور کے اختتام پر ماڈل کے وزن کو بچانے کے قابل بناتی ہے۔",
+ "Custom Pretrained": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ ماڈلز کا استعمال بہتر نتائج کا باعث بن سکتا ہے ، کیونکہ مخصوص استعمال کے معاملے کے مطابق سب سے مناسب پری ٹریننگ ماڈلز کا انتخاب کارکردگی کو نمایاں طور پر بڑھا سکتا ہے۔",
+ "Upload Pretrained Model": "پہلے سے تربیت یافتہ ماڈل اپ لوڈ کریں",
+ "Refresh Custom Pretraineds": "اپنی مرضی کے مطابق پیشگی تربیت یافتہ تازہ کاری کریں",
+ "Pretrained Custom Settings": "پہلے سے تربیت یافتہ کسٹم ترتیبات",
+ "The file you dropped is not a valid pretrained file. Please try again.": "آپ نے جو فائل چھوڑی ہے وہ درست پہلے سے تربیت یافتہ فائل نہیں ہے۔ براہ مہربانی دوبارہ کوشش کریں۔",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "ڈراپ ڈاؤن مینو میں پہلے سے تربیت یافتہ فائل دیکھنے کے لئے ریفریش بٹن پر کلک کریں۔",
+ "Pretrained G Path": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ G",
+ "Pretrained D Path": "اپنی مرضی کے مطابق پہلے سے تربیت یافتہ D",
+ "GPU Settings": "GPU Settings",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "بہتر جی پی یو آرکیٹیکچر والے صارفین کے لئے تجویز کردہ جدید جی پی یو سیٹنگز سیٹ کرتا ہے۔",
+ "GPU Custom Settings": "GPU اپنی مرضی کے مطابق ترتیبات",
+ "GPU Number": "GPU نمبر",
+ "0 to ∞ separated by -": "0 سے الگ ∞ -",
+ "GPU Information": "GPU Information",
+ "Pitch Guidance": "پچ گائیڈنس",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "پچ گائیڈنس کا استعمال کرتے ہوئے ، اس کی پچ سمیت اصل آواز کے آئینے کو آئینہ دار بنانا ممکن ہوجاتا ہے۔ یہ خصوصیت خاص طور پر گانے اور دیگر منظرناموں کے لئے قابل قدر ہے جہاں اصل دھن یا پچ پیٹرن کو محفوظ کرنا ضروری ہے۔",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "اپنی تربیت کرتے وقت پہلے سے تربیت یافتہ ماڈلز کا استعمال کریں۔ یہ نقطہ نظر تربیت کے دورانیے کو کم کرتا ہے اور مجموعی معیار کو بڑھاتا ہے.",
+ "Extract Features": "نکالنے کی خصوصیات",
+ "Start Training": "تربیت شروع کریں",
+ "Generate Index": "انڈیکس پیدا کریں",
+ "Voice Model": "صوتی ماڈل",
+ "Select the voice model to use for the conversion.": "تبادلے کے لئے استعمال کرنے کے لئے صوتی ماڈل منتخب کریں۔",
+ "Index File": "انڈیکس فائل",
+ "Select the index file to use for the conversion.": "تبادلے کے لئے استعمال کرنے کے لئے انڈیکس فائل منتخب کریں۔",
+ "Refresh": "تازہ",
+ "Unload Voice": "آواز کو ان لوڈ کریں",
+ "Single": "تنہا",
+ "Upload Audio": "آڈیو اپ لوڈ کریں",
+ "Select Audio": "آڈیو منتخب کریں",
+ "Select the audio to convert.": "تبدیل کرنے کے لئے آڈیو منتخب کریں۔",
+ "Advanced Settings": "اعلی درجے کی ترتیبات",
+ "Clear Outputs (Deletes all audios in assets/audios)": "آؤٹ پٹ صاف کریں (اثاثوں / آڈیو میں تمام آڈیو حذف کرتا ہے)",
+ "Custom Output Path": "اپنی مرضی کے مطابق آؤٹ پٹ پتھ",
+ "Output Path": "آؤٹ پٹ پتھ",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "وہ راستہ جہاں آؤٹ پٹ آڈیو محفوظ کیا جائے گا ، اثاثوں / آڈیو / output.wav میں ڈیفالٹ طور پر",
+ "Split Audio": "آڈیو کو تقسیم کریں",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "کچھ معاملات میں بہتر نتائج حاصل کرنے کے لئے آڈیو کو ٹکڑوں میں تقسیم کریں۔",
+ "Autotune": "Autotune",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "گانے کی تبدیلیوں کے لئے تجویز کردہ اپنے تخمینوں پر نرم آٹو ٹیون لگائیں۔",
+ "Clean Audio": "صاف آڈیو",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "آواز کا پتہ لگانے والے الگورتھم کا استعمال کرتے ہوئے اپنے آڈیو آؤٹ پٹ کو صاف کریں ، جو آڈیو بولنے کے لئے سفارش کی جاتی ہے۔",
+ "Clean Strength": "صاف طاقت",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "آپ جس آڈیو کو چاہتے ہیں اس پر کلین اپ لیول سیٹ کریں ، جتنا زیادہ آپ اسے بڑھائیں گے اتنا ہی یہ صاف ہوجائے گا ، لیکن یہ ممکن ہے کہ آڈیو زیادہ کمپریسڈ ہوجائے۔",
+ "Pitch": "پچ",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "آڈیو کی پچ سیٹ کریں ، قیمت جتنی زیادہ ہوگی ، پچ اتنی ہی زیادہ ہوگی۔",
+ "Filter Radius": "فلٹر کے دائرے",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "اگر تعداد تین سے زیادہ یا اس کے برابر ہے تو ، جمع کردہ ٹون کے نتائج پر اوسط فلٹرنگ کا استعمال سانس کو کم کرنے کی صلاحیت رکھتا ہے۔",
+ "Search Feature Ratio": "تلاش کی خصوصیت کا تناسب",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "انڈیکس فائل کے ذریعہ اثر و رسوخ؛ ایک اعلی قیمت زیادہ اثر و رسوخ سے مطابقت رکھتی ہے. تاہم ، کم اقدار کا انتخاب آڈیو میں موجود نوادرات کو کم کرنے میں مدد کرسکتا ہے۔",
+ "Volume Envelope": "حجم کا لفافہ",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "آؤٹ پٹ کے حجم لفافے کے ساتھ متبادل یا مرکب کریں۔ تناسب 1 کے جتنا قریب ہوتا ہے ، اتنا ہی زیادہ آؤٹ پٹ لفافہ استعمال ہوتا ہے۔",
+ "Protect Voiceless Consonants": "آواز کے بغیر عبارتوں کی حفاظت کریں",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "الیکٹرو-صوتی پھٹنے اور دیگر نوادرات کو روکنے کے لئے مختلف عبارتوں اور سانس لینے کی آوازوں کی حفاظت کریں۔ پیرامیٹر کو اس کی زیادہ سے زیادہ قیمت 0.5 تک کھینچنا جامع تحفظ فراہم کرتا ہے۔ تاہم ، اس قدر کو کم کرنے سے تحفظ کی حد کم ہوسکتی ہے جبکہ ممکنہ طور پر انڈیکسنگ اثر کو کم کیا جاسکتا ہے۔",
+ "Pitch extraction algorithm": "پچ نکالنے کا الگورتھم",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "آڈیو تبدیلی کے لئے استعمال کرنے کے لئے پچ نکالنے کا الگورتھم۔ ڈیفالٹ الگورتھم آر ایم وی پی ای ہے ، جو زیادہ تر معاملات کے لئے سفارش کی جاتی ہے۔",
+ "Convert": "بدلیں",
+ "Export Audio": "آڈیو برآمد کریں",
+ "Batch": "بیچ",
+ "Input Folder": "ان پٹ فولڈر",
+ "Select the folder containing the audios to convert.": "تبدیل کرنے کے لئے آڈیو پر مشتمل فولڈر منتخب کریں۔",
+ "Enter input path": "ان پٹ راستہ درج کریں",
+ "Output Folder": "آؤٹ پٹ فولڈر",
+ "Select the folder where the output audios will be saved.": "وہ فولڈر منتخب کریں جہاں آؤٹ پٹ آڈیو محفوظ کیے جائیں گے۔",
+ "Enter output path": "آؤٹ پٹ کا راستہ درج کریں",
+ "Get information about the audio": "آڈیو کے بارے میں معلومات حاصل کریں",
+ "Information about the audio file": "آڈیو فائل کے بارے میں معلومات",
+ "Waiting for information...": "معلومات کا انتظار ہے...",
+ "## Voice Blender": "## وائس بلینڈر",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "دو صوتی ماڈل منتخب کریں ، اپنا مطلوبہ مرکب فیصد مقرر کریں ، اور انہیں مکمل طور پر نئی آواز میں بلینڈ کریں۔",
+ "Voice Blender": "Voice Blender",
+ "Drag and drop your model here": "اپنے ماڈل کو یہاں گھسیٹیں اور چھوڑیں",
+ "You can also use a custom path.": "آپ اپنی مرضی کے مطابق راستہ بھی استعمال کرسکتے ہیں۔",
+ "Blend Ratio": "مرکب تناسب",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "پوزیشن کو ایک طرف یا دوسری طرف زیادہ ایڈجسٹ کرنے سے ماڈل پہلے یا دوسرے سے زیادہ مماثلت رکھتا ہے۔",
+ "Fusion": "فیوژن",
+ "Path to Model": "ماڈل کا راستہ",
+ "Enter path to model": "ماڈل کا راستہ درج کریں",
+ "Model information to be placed": "ماڈل کی معلومات رکھی جائے گی",
+ "Inroduce the model information": "ماڈل کی معلومات فراہم کریں",
+ "The information to be placed in the model (You can leave it blank or put anything).": "ماڈل میں رکھی جانے والی معلومات (آپ اسے خالی چھوڑ سکتے ہیں یا کچھ بھی ڈال سکتے ہیں).",
+ "View model information": "ماڈل کی معلومات دیکھیں",
+ "Introduce the model pth path": "ماڈل پی ٹی ایچ راستہ متعارف کروائیں",
+ "View": "منظر",
+ "Model extraction": "ماڈل نکالنے",
+ "Model conversion": "ماڈل کی تبدیلی",
+ "Pth file": "پی ٹی ایچ فائل",
+ "Output of the pth file": "پی ٹی ایچ فائل کی آؤٹ پٹ",
+ "# How to Report an Issue on GitHub": "# گیٹ ہب پر کسی مسئلے کی اطلاع کیسے دیں",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. آپ جس مسئلے کا سامنا کر رہے ہیں اسے ریکارڈ کرنا شروع کرنے کے لئے نیچے 'ریکارڈ اسکرین' بٹن پر کلک کریں۔",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ایک بار جب آپ مسئلے کی ریکارڈنگ مکمل کرلیں تو ، 'اسٹاپ ریکارڈنگ' بٹن پر کلک کریں (وہی بٹن ، لیکن لیبل اس بات پر منحصر ہے کہ آپ فعال طور پر ریکارڈنگ کر رہے ہیں یا نہیں)۔",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [گیٹ ہب ایشوز] (https://github.com/IAHispano/Applio/issues) پر جائیں اور 'نیا مسئلہ' بٹن پر کلک کریں۔",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. فراہم کردہ ایشو ٹیمپلیٹ کو مکمل کریں ، ضرورت کے مطابق تفصیلات شامل کرنے کو یقینی بنائیں ، اور پچھلے مرحلے سے ریکارڈ شدہ فائل کو اپ لوڈ کرنے کے لئے اثاثوں کے سیکشن کا استعمال کریں۔",
+ "Record Screen": "ریکارڈ اسکرین",
+ "Record": "ریکارڈ",
+ "Stop Recording": "ریکارڈنگ بند کریں",
+ "Introduce the model .pth path": "ماڈل .pth پتھ متعارف کروائیں",
+ "See Model Information": "ماڈل کی معلومات دیکھیں",
+ "## Download Model": "## ڈاؤن لوڈ ماڈل",
+ "Model Link": "ماڈل لنک",
+ "Introduce the model link": "ماڈل کا لنک متعارف کروائیں",
+ "Download Model": "ڈاؤن لوڈ ماڈل",
+ "## Drop files": "## فائلیں چھوڑ دیں",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "اپنی .pth فائل اور .انڈیکس فائل کو اس جگہ میں گھسیٹیں۔ ایک کو گھسیٹیں اور پھر دوسرے کو۔",
+ "TTS Voices": "ٹی ٹی ایس وائسز",
+ "Select the TTS voice to use for the conversion.": "تبدیلی کے لئے استعمال کرنے کے لئے TTS آواز منتخب کریں۔",
+ "Text to Synthesize": "ترکیب کرنے کے لئے متن",
+ "Enter the text to synthesize.": "ترکیب کرنے کے لئے متن درج کریں۔",
+ "Or you can upload a .txt file": "یا آپ .txt فائل اپ لوڈ کرسکتے ہیں",
+ "Enter text to synthesize": "ترتیب دینے کے لئے متن درج کریں",
+ "Output Path for TTS Audio": "ٹی ٹی ایس آڈیو کے لئے آؤٹ پٹ پتھ",
+ "Output Path for RVC Audio": "آر وی سی آڈیو کے لئے آؤٹ پٹ پتھ",
+ "Enable Applio integration with Discord presence": "ڈسکارڈ کی موجودگی کے ساتھ ایپلیو انضمام کو فعال کریں",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "یہ ڈسکارڈ میں موجودہ ایپلیو سرگرمی کو ظاہر کرنے کے امکان کو فعال کرے گا۔",
+ "Enable Applio integration with applio.org/models using flask": "فلاسک کا استعمال کرتے ہوئے applio.org/models کے ساتھ ایپلیو انضمام کو فعال کریں",
+ "It will activate the possibility of downloading models with a click from the website.": "یہ ویب سائٹ سے ایک کلک کے ساتھ ماڈل ڈاؤن لوڈ کرنے کے امکان کو فعال کرے گا۔",
+ "Theme": "موضوع",
+ "Select the theme you want to use. (Requires restarting Applio)": "وہ تھیم منتخب کریں جسے آپ استعمال کرنا چاہتے ہیں۔ (ایپلیو کو دوبارہ شروع کرنے کی ضرورت ہے)",
+ "Language": "زبان",
+ "Select the language you want to use. (Requires restarting Applio)": "وہ زبان منتخب کریں جسے آپ استعمال کرنا چاہتے ہیں۔ (ایپلیو کو دوبارہ شروع کرنے کی ضرورت ہے)",
+ "Plugin Installer": "Plugin Installer",
+ "Drag your plugin.zip to install it": "اسے انسٹال کرنے کے لئے اپنے plugin.zip کو گھسیٹیں",
+ "Version Checker": "Version Checker",
+ "Check which version of Applio is the latest to see if you need to update.": "چیک کریں کہ ایپلیو کا کون سا ورژن تازہ ترین ہے یہ دیکھنے کے لئے کہ آیا آپ کو اپ ڈیٹ کرنے کی ضرورت ہے۔",
+ "Check for updates": "اپ ڈیٹس کے لئے چیک کریں"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/vi_VI.json b/assets/i18n/languages/vi_VI.json
new file mode 100644
index 0000000000000000000000000000000000000000..9346cc9f3cf682a803a5b65a978ad5d6f77d0017
--- /dev/null
+++ b/assets/i18n/languages/vi_VI.json
@@ -0,0 +1,175 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Công cụ nhân bản giọng nói tối ưu, được tối ưu hóa tỉ mỉ cho sức mạnh vô song, tính mô-đun và trải nghiệm thân thiện với người dùng.",
+ "This section contains some extra utilities that often may be in experimental phases.": "Phần này chứa một số tiện ích bổ sung thường có thể đang trong giai đoạn thử nghiệm.",
+ "Output Information": "Thông tin đầu ra",
+ "The output information will be displayed here.": "Thông tin đầu ra sẽ được hiển thị ở đây.",
+ "Inference": "Suy luận",
+ "Train": "Xe lửa",
+ "Extra": "Phụ",
+ "Merge Audios": "Hợp nhất âm thanh",
+ "Processing": "Xử lý",
+ "Audio Analyzer": "Máy phân tích âm thanh",
+ "Model Information": "Thông tin mô hình",
+ "Plugins": "Plugin",
+ "Download": "Tải xuống",
+ "Report a Bug": "Báo cáo lỗi",
+ "Settings": "Cài đặt",
+ "Preprocess": "Tiền xử lý",
+ "Model Name": "Tên Model",
+ "Name of the new model.": "Tên của mô hình mới.",
+ "Enter model name": "Nhập tên model",
+ "Dataset Path": "Đường dẫn tập dữ liệu",
+ "Path to the dataset folder.": "Đường dẫn đến thư mục tập dữ liệu.",
+ "Refresh Datasets": "Làm mới tập dữ liệu",
+ "Dataset Creator": "Trình tạo tập dữ liệu",
+ "Dataset Name": "Tên tập dữ liệu",
+ "Name of the new dataset.": "Tên của tập dữ liệu mới.",
+ "Enter dataset name": "Nhập tên tập dữ liệu",
+ "Upload Audio Dataset": "Tải lên tập dữ liệu âm thanh",
+ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Tệp âm thanh đã được thêm thành công vào tập dữ liệu. Vui lòng nhấp vào nút xử lý trước.",
+ "Enter dataset path": "Nhập đường dẫn tập dữ liệu",
+ "Sampling Rate": "Tỷ lệ lấy mẫu",
+ "The sampling rate of the audio files.": "Tốc độ lấy mẫu của các tệp âm thanh.",
+ "Model Architecture": "Phiên bản RVC",
+ "Version of the model architecture.": "Phiên bản RVC của mô hình.",
+ "Preprocess Dataset": "Tập dữ liệu tiền xử lý",
+ "Extract": "Trích",
+ "Hop Length": "Chiều dài hop",
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Biểu thị khoảng thời gian cần thiết để hệ thống chuyển sang thay đổi cao độ đáng kể. Độ dài bước nhảy nhỏ hơn đòi hỏi nhiều thời gian hơn để suy luận nhưng có xu hướng mang lại độ chính xác cao độ cao hơn.",
+ "Batch Size": "Kích thước lô",
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Bạn nên căn chỉnh nó với VRAM có sẵn của GPU của bạn. Cài đặt 4 cung cấp độ chính xác được cải thiện nhưng xử lý chậm hơn, trong khi 8 cung cấp kết quả tiêu chuẩn và nhanh hơn.",
+ "Save Every Epoch": "Lưu mọi kỷ nguyên",
+ "Determine at how many epochs the model will saved at.": "Xác định mô hình sẽ lưu tại bao nhiêu kỷ nguyên.",
+ "Total Epoch": "Tổng kỷ nguyên",
+ "Specifies the overall quantity of epochs for the model training process.": "Chỉ định số lượng tổng thể của các kỷ nguyên cho quá trình đào tạo mô hình.",
+ "Pretrained": "Đào tạo trước",
+ "Save Only Latest": "Chỉ lưu mới nhất",
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Bật cài đặt này sẽ dẫn đến các tệp G và D chỉ lưu các phiên bản mới nhất của chúng, tiết kiệm hiệu quả dung lượng lưu trữ.",
+ "Save Every Weights": "Tiết kiệm mọi trọng lượng",
+ "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Cài đặt này cho phép bạn lưu trọng số của mô hình khi kết thúc mỗi kỷ nguyên.",
+ "Custom Pretrained": "Đào tạo trước tùy chỉnh",
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Sử dụng các mô hình được đào tạo trước tùy chỉnh có thể dẫn đến kết quả vượt trội, vì việc lựa chọn các mô hình được đào tạo trước phù hợp nhất phù hợp với trường hợp sử dụng cụ thể có thể nâng cao đáng kể hiệu suất.",
+ "Upload Pretrained Model": "Tải lên mô hình được đào tạo trước",
+ "Refresh Custom Pretraineds": "Làm mới Custom Pretraineds",
+ "Pretrained Custom Settings": "Cài đặt tùy chỉnh được đào tạo sẵn",
+ "The file you dropped is not a valid pretrained file. Please try again.": "Tệp bạn đã bỏ không phải là tệp được đào tạo trước hợp lệ. Vui lòng thử lại.",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "Nhấp vào nút làm mới để xem tệp được đào tạo trước trong menu thả xuống.",
+ "Pretrained G Path": "Tùy chỉnh được đào tạo trước G",
+ "Pretrained D Path": "Tùy chỉnh được đào tạo trước D",
+ "GPU Settings": "Cài đặt GPU",
+ "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Đặt cài đặt GPU nâng cao, được khuyến nghị cho người dùng có kiến trúc GPU tốt hơn.",
+ "GPU Custom Settings": "Cài đặt tùy chỉnh GPU",
+ "GPU Number": "Số GPU",
+ "0 to ∞ separated by -": "0 đến ∞ cách nhau bởi -",
+ "GPU Information": "Thông tin GPU",
+ "Pitch Guidance": "Hướng dẫn quảng cáo chiêu hàng",
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Bằng cách sử dụng hướng dẫn cao độ, nó trở nên khả thi để phản ánh ngữ điệu của giọng nói gốc, bao gồm cả cao độ của nó. Tính năng này đặc biệt có giá trị đối với ca hát và các tình huống khác trong đó việc giữ nguyên giai điệu hoặc cao độ ban đầu là điều cần thiết.",
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Sử dụng các mô hình được đào tạo trước khi đào tạo của riêng bạn. Cách tiếp cận này làm giảm thời gian đào tạo và nâng cao chất lượng tổng thể.",
+ "Extract Features": "Tính năng trích xuất",
+ "Start Training": "Bắt đầu đào tạo",
+ "Generate Index": "Tạo chỉ mục",
+ "Voice Model": "Mô hình giọng nói",
+ "Select the voice model to use for the conversion.": "Chọn kiểu giọng nói để sử dụng cho quá trình chuyển đổi.",
+ "Index File": "Tệp chỉ mục",
+ "Select the index file to use for the conversion.": "Chọn tệp chỉ mục để sử dụng cho quá trình chuyển đổi.",
+ "Refresh": "Làm tươi",
+ "Unload Voice": "Dỡ giọng nói",
+ "Single": "Đơn",
+ "Upload Audio": "Tải lên âm thanh",
+ "Select Audio": "Chọn Âm thanh",
+ "Select the audio to convert.": "Chọn âm thanh để chuyển đổi.",
+ "Advanced Settings": "Cài đặt nâng cao",
+ "Clear Outputs (Deletes all audios in assets/audios)": "Xóa đầu ra (Xóa tất cả âm thanh trong nội dung / âm thanh)",
+ "Custom Output Path": "Đường dẫn đầu ra tùy chỉnh",
+ "Output Path": "Đường dẫn đầu ra",
+ "The path where the output audio will be saved, by default in assets/audios/output.wav": "Đường dẫn nơi âm thanh đầu ra sẽ được lưu, theo mặc định trong tài sản / âm thanh / output.wav",
+ "Split Audio": "Tách âm thanh",
+ "Split the audio into chunks for inference to obtain better results in some cases.": "Chia âm thanh thành các phần để suy luận nhằm thu được kết quả tốt hơn trong một số trường hợp.",
+ "Autotune": "Tự động điều chỉnh",
+ "Apply a soft autotune to your inferences, recommended for singing conversions.": "Áp dụng autotune mềm cho suy luận của bạn, được đề xuất để chuyển đổi ca hát.",
+ "Clean Audio": "Âm thanh sạch",
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Làm sạch đầu ra âm thanh của bạn bằng các thuật toán phát hiện tiếng ồn, được khuyến nghị để nói âm thanh.",
+ "Clean Strength": "Sức mạnh sạch",
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Đặt mức dọn dẹp thành âm thanh bạn muốn, bạn càng tăng thì càng dọn dẹp, nhưng có thể âm thanh sẽ bị nén nhiều hơn.",
+ "Pitch": "Sân",
+ "Set the pitch of the audio, the higher the value, the higher the pitch.": "Đặt cao độ của âm thanh, giá trị càng cao, cao độ càng cao.",
+ "Filter Radius": "Bán kính lọc",
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Nếu số lượng lớn hơn hoặc bằng ba, việc sử dụng bộ lọc trung bình trên kết quả âm thu thập được có khả năng làm giảm hô hấp.",
+ "Search Feature Ratio": "Tỷ lệ tính năng tìm kiếm",
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Ảnh hưởng tác động của tệp chỉ mục; Giá trị cao hơn tương ứng với ảnh hưởng lớn hơn. Tuy nhiên, việc chọn các giá trị thấp hơn có thể giúp giảm thiểu các hiện vật có trong âm thanh.",
+ "Volume Envelope": "Phong bì khối lượng",
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Thay thế hoặc trộn với phong bì âm lượng của đầu ra. Tỷ lệ càng gần 1, phong bì đầu ra càng được sử dụng.",
+ "Protect Voiceless Consonants": "Bảo vệ phụ âm vô thanh",
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Bảo vệ các phụ âm riêng biệt và âm thanh thở để ngăn ngừa rách âm thanh điện và các hiện vật khác. Kéo tham số đến giá trị tối đa 0, 5 cung cấp sự bảo vệ toàn diện. Tuy nhiên, việc giảm giá trị này có thể làm giảm mức độ bảo vệ trong khi có khả năng giảm thiểu hiệu ứng lập chỉ mục.",
+ "Pitch extraction algorithm": "Thuật toán trích xuất cao độ",
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Thuật toán trích xuất cao độ để sử dụng cho việc chuyển đổi âm thanh. Thuật toán mặc định là rmvpe, được khuyến nghị cho hầu hết các trường hợp.",
+ "Convert": "Convert",
+ "Export Audio": "Xuất âm thanh",
+ "Batch": "Mẻ",
+ "Input Folder": "Thư mục đầu vào",
+ "Select the folder containing the audios to convert.": "Chọn thư mục chứa âm thanh để chuyển đổi.",
+ "Enter input path": "Nhập đường dẫn nhập liệu",
+ "Output Folder": "Thư mục đầu ra",
+ "Select the folder where the output audios will be saved.": "Chọn thư mục lưu âm thanh đầu ra.",
+ "Enter output path": "Nhập đường dẫn đầu ra",
+ "Get information about the audio": "Nhận thông tin về âm thanh",
+ "Information about the audio file": "Thông tin về tệp âm thanh",
+ "Waiting for information...": "Đang chờ thông tin...",
+ "## Voice Blender": "## Máy xay sinh tố giọng nói",
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Chọn hai mẫu giọng nói, đặt tỷ lệ phần trăm pha trộn mong muốn của bạn và trộn chúng thành một giọng nói hoàn toàn mới.",
+ "Voice Blender": "Máy xay sinh tố giọng nói",
+ "Drag and drop your model here": "Kéo và thả mô hình của bạn vào đây",
+ "You can also use a custom path.": "Bạn cũng có thể sử dụng đường dẫn tùy chỉnh.",
+ "Blend Ratio": "Tỷ lệ pha trộn",
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Điều chỉnh vị trí nhiều hơn về phía bên này hay bên kia sẽ làm cho mô hình giống với thứ nhất hoặc thứ hai hơn.",
+ "Fusion": "Fusion",
+ "Path to Model": "Đường dẫn đến mô hình",
+ "Enter path to model": "Nhập đường dẫn đến mô hình",
+ "Model information to be placed": "Thông tin mô hình sẽ được đặt",
+ "Inroduce the model information": "Giới thiệu thông tin mô hình",
+ "The information to be placed in the model (You can leave it blank or put anything).": "Thông tin được đặt trong mô hình (Bạn có thể để trống hoặc đặt bất cứ thứ gì).",
+ "View model information": "Xem thông tin mô hình",
+ "Introduce the model pth path": "Giới thiệu mô hình pth path",
+ "View": "Cảnh",
+ "Model extraction": "Trích xuất mô hình",
+ "Model conversion": "Chuyển đổi mô hình",
+ "Pth file": "Tệp Pth",
+ "Output of the pth file": "Đầu ra của tệp pth",
+ "# How to Report an Issue on GitHub": "# Cách báo cáo sự cố trên GitHub",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Nhấp vào nút 'Ghi lại màn hình' bên dưới để bắt đầu ghi lại sự cố bạn đang gặp phải.",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Khi bạn đã ghi xong sự cố, hãy nhấp vào nút 'Dừng ghi' (cùng một nút, nhưng nhãn thay đổi tùy thuộc vào việc bạn có chủ động ghi hay không).",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Đi tới [Vấn đề GitHub] (https://github.com/IAHispano/Applio/issues) và nhấp vào nút 'Vấn đề mới'.",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Hoàn thành mẫu vấn đề được cung cấp, đảm bảo bao gồm các chi tiết khi cần thiết và sử dụng phần tài sản để tải lên tệp đã ghi từ bước trước.",
+ "Record Screen": "Ghi lại màn hình",
+ "Record": "Ghi",
+ "Stop Recording": "Dừng ghi",
+ "Introduce the model .pth path": "Giới thiệu mô hình đường dẫn .pth",
+ "See Model Information": "Xem thông tin mô hình",
+ "## Download Model": "## Tải xuống mô hình",
+ "Model Link": "Liên kết mô hình",
+ "Introduce the model link": "Giới thiệu link mô hình",
+ "Download Model": "Tải xuống mô hình",
+ "## Drop files": "## Thả tệp",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "Kéo tệp .pth và tệp .index của bạn vào không gian này. Kéo cái này rồi cái kia.",
+ "TTS Voices": "Tiếng nói TTS",
+ "Select the TTS voice to use for the conversion.": "Chọn giọng nói TTS để sử dụng cho quá trình chuyển đổi.",
+ "Text to Synthesize": "Văn bản để tổng hợp",
+ "Enter the text to synthesize.": "Nhập văn bản để tổng hợp.",
+ "Or you can upload a .txt file": "Hoặc bạn có thể tải lên tệp .txt",
+ "Enter text to synthesize": "Nhập văn bản để tổng hợp",
+ "Output Path for TTS Audio": "Đường dẫn đầu ra cho âm thanh TTS",
+ "Output Path for RVC Audio": "Đường dẫn đầu ra cho âm thanh RVC",
+ "Enable Applio integration with Discord presence": "Bật tích hợp Applio với sự hiện diện của Discord",
+ "It will activate the possibility of displaying the current Applio activity in Discord.": "Nó sẽ kích hoạt khả năng hiển thị hoạt động Applio hiện tại trong Discord.",
+ "Enable Applio integration with applio.org/models using flask": "Bật tích hợp Applio với applio.org/models bằng bình",
+ "It will activate the possibility of downloading models with a click from the website.": "Nó sẽ kích hoạt khả năng tải xuống các mô hình bằng một cú nhấp chuột từ trang web.",
+ "Theme": "Đề tài",
+ "Select the theme you want to use. (Requires restarting Applio)": "Chọn chủ đề bạn muốn sử dụng. (Yêu cầu khởi động lại Applio)",
+ "Language": "Ngôn ngữ",
+ "Select the language you want to use. (Requires restarting Applio)": "Chọn ngôn ngữ bạn muốn sử dụng. (Yêu cầu khởi động lại Applio)",
+ "Plugin Installer": "Trình cài đặt plugin",
+ "Drag your plugin.zip to install it": "Kéo plugin.zip của bạn để cài đặt nó",
+ "Version Checker": "Trình kiểm tra phiên bản",
+ "Check which version of Applio is the latest to see if you need to update.": "Kiểm tra xem phiên bản Applio nào là phiên bản mới nhất để xem bạn có cần cập nhật hay không.",
+ "Check for updates": "Kiểm tra bản cập nhật"
+}
\ No newline at end of file
diff --git a/assets/i18n/languages/wu_WU.json b/assets/i18n/languages/wu_WU.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/assets/i18n/languages/wu_WU.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/assets/i18n/languages/zh_CN.json b/assets/i18n/languages/zh_CN.json
new file mode 100644
index 0000000000000000000000000000000000000000..329dfc5cfac56240bb1a549a57a89ae681c9cbdf
--- /dev/null
+++ b/assets/i18n/languages/zh_CN.json
@@ -0,0 +1,113 @@
+{
+ "VITS-based Voice Conversion focused on simplicity, quality and performance.": "终极语音克隆工具,经过精心优化,具有无与伦比的功能、模块化和用户友好的体验。",
+ "This section contains some extra utilities that often may be in experimental phases.": "本节包含一些额外的实用程序,这些实用程序通常处于实验阶段。",
+ "Output Information": "输出信息",
+ "Inference": "推理",
+ "Train": "火车",
+ "Extra": "额外",
+ "Merge Audios": "合并音频",
+ "Processing": "加工",
+ "Audio Analyzer": "音频分析仪",
+ "Model Information": "型号信息",
+ "Download": "下载",
+ "Report a Bug": "报告错误",
+ "Preprocess": "预处理",
+ "Model Name": "型号名称",
+ "Enter model name": "输入型号名称",
+ "Dataset Path": "数据集路径",
+ "Enter dataset path": "输入数据集路径",
+ "Sampling Rate": "采样率",
+ "Model Architecture": "RVC 版本",
+ "Preprocess Dataset": "预处理数据集",
+ "Extract": "提取",
+ "Hop Length": "跳跃长度",
+ "Batch Size": "批量大小",
+ "Save Every Epoch": "保存每个纪元",
+ "Total Epoch": "总纪元",
+ "Pretrained": "预训练",
+ "Save Only Latest": "仅保存最新",
+ "Save Every Weights": "节省每一次砝码",
+ "Custom Pretrained": "自定义预训练",
+ "Upload Pretrained Model": "上传预训练模型",
+ "Pretrained Custom Settings": "预训练的自定义设置",
+ "The file you dropped is not a valid pretrained file. Please try again.": "您删除的文件不是有效的预训练文件。请再试一次。",
+ "Click the refresh button to see the pretrained file in the dropdown menu.": "单击刷新按钮,在下拉菜单中查看预训练文件。",
+ "Pretrained G Path": "自定义预训练 G",
+ "Pretrained D Path": "自定义预训练 D",
+ "GPU Settings": "GPU 设置",
+ "GPU Custom Settings": "GPU 自定义设置",
+ "GPU Number": "GPU 数量",
+ "0 to ∞ separated by -": "0 到 ∞ 之间用 -",
+ "GPU Information": "GPU 信息",
+ "Pitch Guidance": "音高指导",
+ "Extract Features": "提取特征",
+ "Start Training": "开始训练",
+ "Generate Index": "生成索引",
+ "Voice Model": "语音模型",
+ "Index File": "Index 文件",
+ "Refresh": "刷新",
+ "Unload Voice": "卸载语音",
+ "Single": "单",
+ "Upload Audio": "上传音频",
+ "Select Audio": "选择音频",
+ "Advanced Settings": "高级设置",
+ "Clear Outputs (Deletes all audios in assets/audios)": "清除输出(删除资产/音频中的所有音频)",
+ "Custom Output Path": "自定义输出路径",
+ "Output Path": "输出路径",
+ "Pitch": "投",
+ "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "如果 >=3:对收获的音高结果应用中值滤波。该值表示过滤器半径,可以减少呼吸",
+ "Search Feature Ratio": "搜索特征比率",
+ "Pitch extraction algorithm": "音高提取算法",
+ "Convert": "转换",
+ "Export Audio": "导出音频",
+ "Batch": "批",
+ "Input Folder": "输入文件夹",
+ "Enter input path": "输入输入路径",
+ "Output Folder": "输出文件夹",
+ "Enter output path": "输入输出路径",
+ "Get information about the audio": "获取有关音频的信息",
+ "Information about the audio file": "有关音频文件的信息",
+ "Waiting for information...": "等待信息...",
+ "Model fusion": "模型融合",
+ "Weight for Model A": "A型重量",
+ "Whether the model has pitch guidance": "模型是否具有俯仰引导",
+ "Model architecture version": "模型架构版本",
+ "Path to Model A": "模型 A 的路径",
+ "Path to Model B": "模型 B 的路径",
+ "Path to model": "模型路径",
+ "Model information to be placed": "要放置的模型信息",
+ "Fusion": "融合",
+ "Modify model information": "修改模型信息",
+ "Path to Model": "模型路径",
+ "Model information to be modified": "要修改的模型信息",
+ "Save file name": "保存文件名",
+ "Modify": "修改",
+ "View model information": "查看型号信息",
+ "View": "视图",
+ "Model extraction": "模型提取",
+ "Model conversion": "模型转换",
+ "Pth file": "Pth 文件",
+ "Output of the pth file": "pth 文件的输出",
+ "# How to Report an Issue on GitHub": "# 如何在 GitHub 上报告问题",
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1.单击下面的“录制屏幕”按钮开始记录您遇到的问题。",
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. 录制完问题后,单击“停止录制”按钮(相同的按钮,但标签会根据您是否正在录制而变化)。",
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. 转到 [GitHub 问题](https://github.com/IAHispano/Applio/issues),然后单击“新问题”按钮。",
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. 填写提供的问题模板,确保根据需要包含详细信息,并利用资产部分上传上一步的记录文件。",
+ "Record Screen": "录制屏幕",
+ "Record": "记录",
+ "Stop Recording": "停止录制",
+ "Introduce the model .pth path": "引入模型 .pth 路径",
+ "See Model Information": "查看型号信息",
+ "## Download Model": "## 下载模型",
+ "Model Link": "模型链接",
+ "Introduce the model link": "介绍模型链接",
+ "Download Model": "下载模型",
+ "## Drop files": "## 删除文件",
+ "Drag your .pth file and .index file into this space. Drag one and then the other.": "将 .pth 文件和 .index 文件拖到此空间中。拖动一个,然后拖动另一个。",
+ "TTS Voices": "TTS语音",
+ "Text to Synthesize": "要合成的文本",
+ "Enter text to synthesize": "输入要合成的文本",
+ "Output Path for TTS Audio": "TTS 音频的输出路径",
+ "Output Path for RVC Audio": "RVC 音频的输出路径",
+ "Enable Applio integration with Discord presence": "Applio 存在"
+}
diff --git a/assets/i18n/scan.py b/assets/i18n/scan.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1de0f74e55962e914420d395776733ba1e6fe61
--- /dev/null
+++ b/assets/i18n/scan.py
@@ -0,0 +1,71 @@
+import ast
+import json
+from pathlib import Path
+from collections import OrderedDict
+
+
+def extract_i18n_strings(node):
+ i18n_strings = []
+
+ if (
+ isinstance(node, ast.Call)
+ and isinstance(node.func, ast.Name)
+ and node.func.id == "i18n"
+ ):
+ for arg in node.args:
+ if isinstance(arg, ast.Str):
+ i18n_strings.append(arg.s)
+
+ for child_node in ast.iter_child_nodes(node):
+ i18n_strings.extend(extract_i18n_strings(child_node))
+
+ return i18n_strings
+
+
+def process_file(file_path):
+ with open(file_path, "r", encoding="utf8") as file:
+ code = file.read()
+ if "I18nAuto" in code:
+ tree = ast.parse(code)
+ i18n_strings = extract_i18n_strings(tree)
+ print(file_path, len(i18n_strings))
+ return i18n_strings
+ return []
+
+
+# Use pathlib for file handling
+py_files = Path(".").rglob("*.py")
+
+# Use a set to store unique strings
+code_keys = set()
+
+for py_file in py_files:
+ strings = process_file(py_file)
+ code_keys.update(strings)
+
+print()
+print("Total unique:", len(code_keys))
+
+standard_file = "languages/en_US.json"
+with open(standard_file, "r", encoding="utf-8") as file:
+ standard_data = json.load(file, object_pairs_hook=OrderedDict)
+standard_keys = set(standard_data.keys())
+
+# Combine unused and missing keys sections
+unused_keys = standard_keys - code_keys
+missing_keys = code_keys - standard_keys
+
+print("Unused keys:", len(unused_keys))
+for unused_key in unused_keys:
+ print("\t", unused_key)
+
+print("Missing keys:", len(missing_keys))
+for missing_key in missing_keys:
+ print("\t", missing_key)
+
+code_keys_dict = OrderedDict((s, s) for s in code_keys)
+
+# Use context manager for writing back to the file
+with open(standard_file, "w", encoding="utf-8") as file:
+ json.dump(code_keys_dict, file, ensure_ascii=False, indent=4, sort_keys=True)
+ file.write("\n")
diff --git a/assets/installation_checker.py b/assets/installation_checker.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d665a509ca7a43e43b98b6e0ff785628eeb313a
--- /dev/null
+++ b/assets/installation_checker.py
@@ -0,0 +1,38 @@
+import sys
+import os
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+
+class InstallationError(Exception):
+ def __init__(self, message="InstallationError"):
+ self.message = message
+ super().__init__(self.message)
+
+
+def check_installation():
+ try:
+ system_drive = os.getenv("SystemDrive")
+ current_drive = os.path.splitdrive(now_dir)[0]
+ if current_drive.upper() != system_drive.upper():
+ raise InstallationError(
+ f"Installation Error: The current working directory is on drive {current_drive}, but the default system drive is {system_drive}. Please move Applio to the {system_drive} drive."
+ )
+ except:
+ pass
+ else:
+ if "OneDrive" in now_dir:
+ raise InstallationError(
+ "Installation Error: The current working directory is located in OneDrive. Please move Applio to a different folder."
+ )
+ elif " " in now_dir:
+ raise InstallationError(
+ "Installation Error: The current working directory contains spaces. Please move Applio to a folder without spaces in its path."
+ )
+ try:
+ now_dir.encode("ascii")
+ except UnicodeEncodeError:
+ raise InstallationError(
+ "Installation Error: The current working directory contains non-ASCII characters. Please move Applio to a folder with only ASCII characters in its path."
+ )
diff --git a/assets/presets/Default.json b/assets/presets/Default.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f94448ca5fefd38a6b9761c6ca1e031c242259a
--- /dev/null
+++ b/assets/presets/Default.json
@@ -0,0 +1,7 @@
+{
+ "pitch": 0,
+ "filter_radius": 3,
+ "index_rate": 0.75,
+ "rms_mix_rate": 1,
+ "protect": 0.5
+}
\ No newline at end of file
diff --git a/assets/presets/Good for Anything.json b/assets/presets/Good for Anything.json
new file mode 100644
index 0000000000000000000000000000000000000000..8c6d878a0b8de2a5d808c84e0183cc5812b4c751
--- /dev/null
+++ b/assets/presets/Good for Anything.json
@@ -0,0 +1,7 @@
+{
+ "pitch": 0,
+ "filter_radius": 3,
+ "index_rate": 0.75,
+ "rms_mix_rate": 0.3,
+ "protect": 0.33
+}
\ No newline at end of file
diff --git a/assets/presets/Music.json b/assets/presets/Music.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ac821bb48d00013df68be3df18a84145de644db
--- /dev/null
+++ b/assets/presets/Music.json
@@ -0,0 +1,7 @@
+{
+ "pitch": 0,
+ "filter_radius": 3,
+ "index_rate": 0.75,
+ "rms_mix_rate": 0.25,
+ "protect": 0.33
+}
\ No newline at end of file
diff --git a/assets/themes/Applio.py b/assets/themes/Applio.py
new file mode 100644
index 0000000000000000000000000000000000000000..e605ee11ce0a9a03e1bf6ed2e8e7e09dd8bb2c06
--- /dev/null
+++ b/assets/themes/Applio.py
@@ -0,0 +1,284 @@
+from __future__ import annotations
+
+from typing import Iterable
+import gradio as gr
+
+# gr.themes.builder()
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts, sizes
+import time
+
+
+class Applio(Base):
+ def __init__(
+ self,
+ *,
+ primary_hue: colors.Color | str = colors.neutral,
+ secondary_hue: colors.Color | str = colors.neutral,
+ neutral_hue: colors.Color | str = colors.neutral,
+ spacing_size: sizes.Size | str = sizes.spacing_md,
+ radius_size: sizes.Size | str = sizes.radius_md,
+ text_size: sizes.Size | str = sizes.text_lg,
+ font: fonts.Font | str | Iterable[fonts.Font | str] = (
+ "Syne V",
+ fonts.GoogleFont("Syne"),
+ "ui-sans-serif",
+ "system-ui",
+ ),
+ font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+ "ui-monospace",
+ fonts.GoogleFont("Nunito Sans"),
+ ),
+ ):
+ super().__init__(
+ primary_hue=primary_hue,
+ secondary_hue=secondary_hue,
+ neutral_hue=neutral_hue,
+ spacing_size=spacing_size,
+ radius_size=radius_size,
+ text_size=text_size,
+ font=font,
+ font_mono=font_mono,
+ )
+ self.name = ("Applio",)
+ self.secondary_100 = ("#dbeafe",)
+ self.secondary_200 = ("#bfdbfe",)
+ self.secondary_300 = ("#93c5fd",)
+ self.secondary_400 = ("#60a5fa",)
+ self.secondary_50 = ("#eff6ff",)
+ self.secondary_500 = ("#3b82f6",)
+ self.secondary_600 = ("#2563eb",)
+ self.secondary_700 = ("#1d4ed8",)
+ self.secondary_800 = ("#1e40af",)
+ self.secondary_900 = ("#1e3a8a",)
+ self.secondary_950 = ("#1d3660",)
+
+ super().set(
+ # Blaise
+ background_fill_primary="#110F0F",
+ background_fill_primary_dark="#110F0F",
+ background_fill_secondary="#110F0F",
+ background_fill_secondary_dark="#110F0F",
+ block_background_fill="*neutral_800",
+ block_background_fill_dark="*neutral_800",
+ block_border_color="*border_color_primary",
+ block_border_color_dark="*border_color_primary",
+ block_border_width="1px",
+ block_border_width_dark="1px",
+ block_info_text_color="*body_text_color_subdued",
+ block_info_text_color_dark="*body_text_color_subdued",
+ block_info_text_size="*text_sm",
+ block_info_text_weight="400",
+ block_label_background_fill="*background_fill_primary",
+ block_label_background_fill_dark="*background_fill_secondary",
+ block_label_border_color="*border_color_primary",
+ block_label_border_color_dark="*border_color_primary",
+ block_label_border_width="1px",
+ block_label_border_width_dark="1px",
+ block_label_margin="0",
+ block_label_padding="*spacing_sm *spacing_lg",
+ block_label_radius="calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px) 0",
+ block_label_right_radius="0 calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px)",
+ block_label_shadow="*block_shadow",
+ block_label_text_color="*#110F0F",
+ block_label_text_color_dark="*#110F0F",
+ block_label_text_weight="400",
+ block_padding="*spacing_xl",
+ block_radius="*radius_md",
+ block_shadow="none",
+ block_shadow_dark="none",
+ block_title_background_fill="rgb(255,255,255)",
+ block_title_background_fill_dark="rgb(255,255,255)",
+ block_title_border_color="none",
+ block_title_border_color_dark="none",
+ block_title_border_width="0px",
+ block_title_padding="*block_label_padding",
+ block_title_radius="*block_label_radius",
+ block_title_text_color="#110F0F",
+ block_title_text_color_dark="#110F0F",
+ block_title_text_size="*text_md",
+ block_title_text_weight="600",
+ body_background_fill="#110F0F",
+ body_background_fill_dark="#110F0F",
+ body_text_color="white",
+ body_text_color_dark="white",
+ body_text_color_subdued="*neutral_400",
+ body_text_color_subdued_dark="*neutral_400",
+ body_text_size="*text_md",
+ body_text_weight="400",
+ border_color_accent="*neutral_600",
+ border_color_accent_dark="*neutral_600",
+ border_color_primary="*neutral_800",
+ border_color_primary_dark="*neutral_800",
+ button_border_width="*input_border_width",
+ button_border_width_dark="*input_border_width",
+ button_cancel_background_fill="*button_secondary_background_fill",
+ button_cancel_background_fill_dark="*button_secondary_background_fill",
+ button_cancel_background_fill_hover="*button_cancel_background_fill",
+ button_cancel_background_fill_hover_dark="*button_cancel_background_fill",
+ button_cancel_border_color="*button_secondary_border_color",
+ button_cancel_border_color_dark="*button_secondary_border_color",
+ button_cancel_border_color_hover="*button_cancel_border_color",
+ button_cancel_border_color_hover_dark="*button_cancel_border_color",
+ button_cancel_text_color="#110F0F",
+ button_cancel_text_color_dark="#110F0F",
+ button_cancel_text_color_hover="#110F0F",
+ button_cancel_text_color_hover_dark="#110F0F",
+ button_large_padding="*spacing_lg calc(2 * *spacing_lg)",
+ button_large_radius="*radius_lg",
+ button_large_text_size="*text_lg",
+ button_large_text_weight="600",
+ button_primary_background_fill="*primary_600",
+ button_primary_background_fill_dark="*primary_600",
+ button_primary_background_fill_hover="*primary_500",
+ button_primary_background_fill_hover_dark="*primary_500",
+ button_primary_border_color="*primary_500",
+ button_primary_border_color_dark="*primary_500",
+ button_primary_border_color_hover="*primary_400",
+ button_primary_border_color_hover_dark="*primary_400",
+ button_primary_text_color="white",
+ button_primary_text_color_dark="white",
+ button_primary_text_color_hover="#110F0F",
+ button_primary_text_color_hover_dark="#110F0F",
+ button_secondary_background_fill="transparent",
+ button_secondary_background_fill_dark="transparent",
+ button_secondary_background_fill_hover="*neutral_800",
+ button_secondary_background_fill_hover_dark="*neutral_800",
+ button_secondary_border_color="*neutral_700",
+ button_secondary_border_color_dark="*neutral_700",
+ button_secondary_border_color_hover="*neutral_600",
+ button_secondary_border_color_hover_dark="*neutral_600",
+ button_secondary_text_color="white",
+ button_secondary_text_color_dark="white",
+ button_secondary_text_color_hover="*button_secondary_text_color",
+ button_secondary_text_color_hover_dark="*button_secondary_text_color",
+ button_shadow="none",
+ button_shadow_active="*shadow_inset",
+ button_shadow_hover="none",
+ button_small_padding="*spacing_sm calc(2 * *spacing_sm)",
+ button_small_radius="*radius_lg",
+ button_small_text_size="*text_md",
+ button_small_text_weight="400",
+ button_transition="0.3s ease all",
+ checkbox_background_color="*neutral_700",
+ checkbox_background_color_dark="*neutral_700",
+ checkbox_background_color_focus="*checkbox_background_color",
+ checkbox_background_color_focus_dark="*checkbox_background_color",
+ checkbox_background_color_hover="*checkbox_background_color",
+ checkbox_background_color_hover_dark="*checkbox_background_color",
+ checkbox_background_color_selected="*secondary_600",
+ checkbox_background_color_selected_dark="*secondary_600",
+ checkbox_border_color="*neutral_700",
+ checkbox_border_color_dark="*neutral_700",
+ checkbox_border_color_focus="*secondary_500",
+ checkbox_border_color_focus_dark="*secondary_500",
+ checkbox_border_color_hover="*neutral_600",
+ checkbox_border_color_hover_dark="*neutral_600",
+ checkbox_border_color_selected="*secondary_600",
+ checkbox_border_color_selected_dark="*secondary_600",
+ checkbox_border_radius="*radius_sm",
+ checkbox_border_width="*input_border_width",
+ checkbox_border_width_dark="*input_border_width",
+ checkbox_check="url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e\")",
+ checkbox_label_background_fill="transparent",
+ checkbox_label_background_fill_dark="transparent",
+ checkbox_label_background_fill_hover="transparent",
+ checkbox_label_background_fill_hover_dark="transparent",
+ checkbox_label_background_fill_selected="transparent",
+ checkbox_label_background_fill_selected_dark="transparent",
+ checkbox_label_border_color="transparent",
+ checkbox_label_border_color_dark="transparent",
+ checkbox_label_border_color_hover="transparent",
+ checkbox_label_border_color_hover_dark="transparent",
+ checkbox_label_border_width="transparent",
+ checkbox_label_border_width_dark="transparent",
+ checkbox_label_gap="*spacing_lg",
+ checkbox_label_padding="*spacing_md calc(2 * *spacing_md)",
+ checkbox_label_shadow="none",
+ checkbox_label_text_color="*body_text_color",
+ checkbox_label_text_color_dark="*body_text_color",
+ checkbox_label_text_color_selected="*checkbox_label_text_color",
+ checkbox_label_text_color_selected_dark="*checkbox_label_text_color",
+ checkbox_label_text_size="*text_md",
+ checkbox_label_text_weight="400",
+ checkbox_shadow="*input_shadow",
+ color_accent="*primary_500",
+ color_accent_soft="*primary_50",
+ color_accent_soft_dark="*neutral_700",
+ container_radius="*radius_xl",
+ embed_radius="*radius_lg",
+ error_background_fill="*background_fill_primary",
+ error_background_fill_dark="*background_fill_primary",
+ error_border_color="*border_color_primary",
+ error_border_color_dark="*border_color_primary",
+ error_border_width="1px",
+ error_border_width_dark="1px",
+ error_text_color="#ef4444",
+ error_text_color_dark="#ef4444",
+ form_gap_width="0px",
+ input_background_fill="*neutral_900",
+ input_background_fill_dark="*neutral_900",
+ input_background_fill_focus="*secondary_600",
+ input_background_fill_focus_dark="*secondary_600",
+ input_background_fill_hover="*input_background_fill",
+ input_background_fill_hover_dark="*input_background_fill",
+ input_border_color="*neutral_700",
+ input_border_color_dark="*neutral_700",
+ input_border_color_focus="*secondary_600",
+ input_border_color_focus_dark="*primary_600",
+ input_border_color_hover="*input_border_color",
+ input_border_color_hover_dark="*input_border_color",
+ input_border_width="1px",
+ input_border_width_dark="1px",
+ input_padding="*spacing_xl",
+ input_placeholder_color="*neutral_500",
+ input_placeholder_color_dark="*neutral_500",
+ input_radius="*radius_lg",
+ input_shadow="none",
+ input_shadow_dark="none",
+ input_shadow_focus="*input_shadow",
+ input_shadow_focus_dark="*input_shadow",
+ input_text_size="*text_md",
+ input_text_weight="400",
+ layout_gap="*spacing_xxl",
+ link_text_color="*secondary_500",
+ link_text_color_active="*secondary_500",
+ link_text_color_active_dark="*secondary_500",
+ link_text_color_dark="*secondary_500",
+ link_text_color_hover="*secondary_400",
+ link_text_color_hover_dark="*secondary_400",
+ link_text_color_visited="*secondary_600",
+ link_text_color_visited_dark="*secondary_600",
+ loader_color="*color_accent",
+ loader_color_dark="*color_accent",
+ panel_background_fill="*background_fill_secondary",
+ panel_background_fill_dark="*background_fill_secondary",
+ panel_border_color="*border_color_primary",
+ panel_border_color_dark="*border_color_primary",
+ panel_border_width="1px",
+ panel_border_width_dark="1px",
+ prose_header_text_weight="600",
+ prose_text_size="*text_md",
+ prose_text_weight="400",
+ radio_circle="url(\"data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e\")",
+ section_header_text_size="*text_md",
+ section_header_text_weight="400",
+ shadow_drop="rgba(0,0,0,0.05) 0px 1px 2px 0px",
+ shadow_drop_lg="0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)",
+ shadow_inset="rgba(0,0,0,0.05) 0px 2px 4px 0px inset",
+ shadow_spread="3px",
+ shadow_spread_dark="1px",
+ slider_color="#9E9E9E",
+ slider_color_dark="#9E9E9E",
+ stat_background_fill="*primary_500",
+ stat_background_fill_dark="*primary_500",
+ table_border_color="*neutral_700",
+ table_border_color_dark="*neutral_700",
+ table_even_background_fill="*neutral_950",
+ table_even_background_fill_dark="*neutral_950",
+ table_odd_background_fill="*neutral_900",
+ table_odd_background_fill_dark="*neutral_900",
+ table_radius="*radius_lg",
+ table_row_focus="*color_accent_soft",
+ table_row_focus_dark="*color_accent_soft",
+ )
diff --git a/assets/themes/loadThemes.py b/assets/themes/loadThemes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcea2c9fbb8164f68776833e09123e0657a1bb79
--- /dev/null
+++ b/assets/themes/loadThemes.py
@@ -0,0 +1,122 @@
+import json
+import os
+import importlib
+import gradio as gr
+
+now_dir = os.getcwd()
+
+folder = os.path.dirname(os.path.abspath(__file__))
+folder = os.path.dirname(folder)
+folder = os.path.dirname(folder)
+folder = os.path.join(folder, "assets", "themes")
+config_file = os.path.join(now_dir, "assets", "config.json")
+
+import sys
+
+sys.path.append(folder)
+
+
+def get_class(filename):
+ with open(filename, "r", encoding="utf8") as file:
+ for line_number, line in enumerate(file, start=1):
+ if "class " in line:
+ found = line.split("class ")[1].split(":")[0].split("(")[0].strip()
+ return found
+ break
+ return None
+
+
+def get_list():
+
+ themes_from_files = [
+ os.path.splitext(name)[0]
+ for root, _, files in os.walk(folder, topdown=False)
+ for name in files
+ if name.endswith(".py") and root == folder
+ ]
+
+ json_file_path = os.path.join(folder, "theme_list.json")
+
+ try:
+ with open(json_file_path, "r", encoding="utf8") as json_file:
+ themes_from_url = [item["id"] for item in json.load(json_file)]
+ except FileNotFoundError:
+ themes_from_url = []
+
+ combined_themes = set(themes_from_files + themes_from_url)
+
+ return list(combined_themes)
+
+
+def select_theme(name):
+ selected_file = name + ".py"
+ full_path = os.path.join(folder, selected_file)
+
+ if not os.path.exists(full_path):
+ with open(config_file, "r", encoding="utf8") as json_file:
+ config_data = json.load(json_file)
+
+ config_data["theme"]["file"] = None
+ config_data["theme"]["class"] = name
+
+ with open(config_file, "w", encoding="utf8") as json_file:
+ json.dump(config_data, json_file, indent=2)
+ print(f"Theme {name} successfully selected, restart applio.")
+ gr.Info(f"Theme {name} successfully selected, restart applio.")
+ return
+
+ class_found = get_class(full_path)
+ if class_found:
+ with open(config_file, "r", encoding="utf8") as json_file:
+ config_data = json.load(json_file)
+
+ config_data["theme"]["file"] = selected_file
+ config_data["theme"]["class"] = class_found
+
+ with open(config_file, "w", encoding="utf8") as json_file:
+ json.dump(config_data, json_file, indent=2)
+ print(f"Theme {name} successfully selected, restart applio.")
+ gr.Info(f"Theme {name} successfully selected, restart applio.")
+ else:
+ print(f"Theme {name} was not found.")
+
+
+def read_json():
+ try:
+ with open(config_file, "r", encoding="utf8") as json_file:
+ data = json.load(json_file)
+ selected_file = data["theme"]["file"]
+ class_name = data["theme"]["class"]
+
+ if selected_file is not None and class_name:
+ return class_name
+ elif selected_file == None and class_name:
+ return class_name
+ else:
+ return "ParityError/Interstellar"
+ except Exception as error:
+ print(f"An error occurred loading the theme: {error}")
+ return "ParityError/Interstellar"
+
+
+def load_json():
+ try:
+ with open(config_file, "r", encoding="utf8") as json_file:
+ data = json.load(json_file)
+ selected_file = data["theme"]["file"]
+ class_name = data["theme"]["class"]
+
+ if selected_file is not None and class_name:
+ module = importlib.import_module(selected_file[:-3])
+ obtained_class = getattr(module, class_name)
+ instance = obtained_class()
+ print(f"Theme {class_name} successfully loaded.")
+ return instance
+ elif selected_file == None and class_name:
+ return class_name
+ else:
+ print("The theme is incorrect.")
+ return None
+ except Exception as error:
+ print(f"An error occurred loading the theme: {error}")
+ return None
diff --git a/assets/themes/theme_list.json b/assets/themes/theme_list.json
new file mode 100644
index 0000000000000000000000000000000000000000..8efc6f9b49e558892175e8eac9d1131a312cd651
--- /dev/null
+++ b/assets/themes/theme_list.json
@@ -0,0 +1,81 @@
+[
+ {"id": "freddyaboulton/dracula_revamped"},
+ {"id": "freddyaboulton/bad-theme-space"},
+ {"id": "gradio/dracula_revamped"},
+ {"id": "abidlabs/dracula_revamped"},
+ {"id": "gradio/dracula_test"},
+ {"id": "abidlabs/dracula_test"},
+ {"id": "gradio/seafoam"},
+ {"id": "gradio/glass"},
+ {"id": "gradio/monochrome"},
+ {"id": "gradio/soft"},
+ {"id": "gradio/default"},
+ {"id": "gradio/base"},
+ {"id": "abidlabs/pakistan"},
+ {"id": "dawood/microsoft_windows"},
+ {"id": "ysharma/steampunk"},
+ {"id": "ysharma/huggingface"},
+ {"id": "gstaff/xkcd"},
+ {"id": "JohnSmith9982/small_and_pretty"},
+ {"id": "abidlabs/Lime"},
+ {"id": "freddyaboulton/this-theme-does-not-exist-2"},
+ {"id": "aliabid94/new-theme"},
+ {"id": "aliabid94/test2"},
+ {"id": "aliabid94/test3"},
+ {"id": "aliabid94/test4"},
+ {"id": "abidlabs/banana"},
+ {"id": "freddyaboulton/test-blue"},
+ {"id": "gstaff/sketch"},
+ {"id": "gstaff/whiteboard"},
+ {"id": "ysharma/llamas"},
+ {"id": "abidlabs/font-test"},
+ {"id": "YenLai/Superhuman"},
+ {"id": "bethecloud/storj_theme"},
+ {"id": "sudeepshouche/minimalist"},
+ {"id": "knotdgaf/gradiotest"},
+ {"id": "ParityError/Interstellar"},
+ {"id": "ParityError/Anime"},
+ {"id": "Ajaxon6255/Emerald_Isle"},
+ {"id": "ParityError/LimeFace"},
+ {"id": "finlaymacklon/smooth_slate"},
+ {"id": "finlaymacklon/boxy_violet"},
+ {"id": "derekzen/stardust"},
+ {"id": "EveryPizza/Cartoony-Gradio-Theme"},
+ {"id": "Ifeanyi/Cyanister"},
+ {"id": "Tshackelton/IBMPlex-DenseReadable"},
+ {"id": "snehilsanyal/scikit-learn"},
+ {"id": "Himhimhim/xkcd"},
+ {"id": "shivi/calm_seafoam"},
+ {"id": "nota-ai/theme"},
+ {"id": "rawrsor1/Everforest"},
+ {"id": "SebastianBravo/simci_css"},
+ {"id": "rottenlittlecreature/Moon_Goblin"},
+ {"id": "abidlabs/test-yellow"},
+ {"id": "abidlabs/test-yellow3"},
+ {"id": "idspicQstitho/dracula_revamped"},
+ {"id": "kfahn/AnimalPose"},
+ {"id": "HaleyCH/HaleyCH_Theme"},
+ {"id": "simulKitke/dracula_test"},
+ {"id": "braintacles/CrimsonNight"},
+ {"id": "wentaohe/whiteboardv2"},
+ {"id": "reilnuud/polite"},
+ {"id": "remilia/Ghostly"},
+ {"id": "Franklisi/darkmode"},
+ {"id": "coding-alt/soft"},
+ {"id": "xiaobaiyuan/theme_land"},
+ {"id": "step-3-profit/Midnight-Deep"},
+ {"id": "xiaobaiyuan/theme_demo"},
+ {"id": "Taithrah/Minimal"},
+ {"id": "Insuz/SimpleIndigo"},
+ {"id": "zkunn/Alipay_Gradio_theme"},
+ {"id": "Insuz/Mocha"},
+ {"id": "xiaobaiyuan/theme_brief"},
+ {"id": "Ama434/434-base-Barlow"},
+ {"id": "Ama434/def_barlow"},
+ {"id": "Ama434/neutral-barlow"},
+ {"id": "dawood/dracula_test"},
+ {"id": "nuttea/Softblue"},
+ {"id": "BlueDancer/Alien_Diffusion"},
+ {"id": "naughtondale/monochrome"},
+ {"id": "Dagfinn1962/standard"}
+]
\ No newline at end of file
diff --git a/assets/version_checker.py b/assets/version_checker.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a364f9604d52d5aa464e8629b2cfda7a28222ca
--- /dev/null
+++ b/assets/version_checker.py
@@ -0,0 +1,57 @@
+import os
+import sys
+import json
+import requests
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+config_file = os.path.join(now_dir, "assets", "config.json")
+
+
+def load_local_version():
+ try:
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+ return config["version"]
+ except (FileNotFoundError, json.JSONDecodeError) as error:
+ print(f"Error loading local version: {error}")
+ return None
+
+
+def obtain_tag_name():
+ url = "https://api.github.com/repos/IAHispano/Applio/releases/latest"
+ session = requests.Session()
+
+ try:
+ response = session.get(url)
+ response.raise_for_status()
+
+ data = response.json()
+ return data.get("tag_name")
+
+ except requests.exceptions.RequestException as error:
+ print(f"Error obtaining online version: {error}")
+ return None
+
+
+def compare_version():
+ local_version = load_local_version()
+ if not local_version:
+ return "Local version could not be determined."
+
+ online_version = obtain_tag_name()
+ if not online_version:
+ return "Online version could not be determined. Make sure you have an internet connection."
+
+ elements_online_version = list(map(int, online_version.split(".")))
+ elements_local_version = list(map(int, local_version.split(".")))
+
+ for online, local in zip(elements_online_version, elements_local_version):
+ if local < online:
+ return f"Your local version {local_version} is older than the latest version {online_version}."
+
+ if len(elements_online_version) > len(elements_local_version):
+ return f"Your local version {local_version} is older than the latest version {online_version}."
+
+ return f"Your local version {local_version} is the latest version."
diff --git a/assets/zluda/README.md b/assets/zluda/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7c9b398372c30bc3307ed8763cd5622d53e34854
--- /dev/null
+++ b/assets/zluda/README.md
@@ -0,0 +1,63 @@
+## Installation and Setup Instructions
+
+### 1. Install VC++ Runtime
+Download and install the VC++ Runtime from [this link](https://aka.ms/vs/17/release/vc_redist.x64.exe).
+
+### 2. Install HIP SDK
+
+- **Read the [System Requirements](https://rocm.docs.amd.com/projects/install-on-windows/en/develop/reference/system-requirements.html)**
+
+ Check the **"Windows-supported GPUs"** section to determine the correct installation steps:
+
+ - **If your GPU has a green checkbox in the HIP SDK column:**
+ - **Install either v6.1.2 or v5.7.1 HIP SDK**
+ - Download from [AMD ROCm Hub](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
+
+ - **If your GPU has a red cross in the HIP SDK column:**
+ - **Install v5.7.1 HIP SDK**
+ - For 6700, 6700XT, 6750XT, download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/Optimised_ROCmLibs_gfx1031.7z)
+ - For 6600, 6600XT, 6650XT, download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/Optimised_ROCmLibs_gfx1032.7z)
+
+ **Steps:**
+ 1. Rename `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\library` to `library.old`
+ 2. Create a new folder named `library`
+ 3. Unzip the content of the archive into that folder
+
+ - **If your GPU is not listed:**
+ - **Install v5.7.1 HIP SDK**
+ 1. Google "techpowerup your_gpu" to find the value of "Shader ISA" (gfxnnnn). Only `gfx803/900/906/1010/1011/1012/1030/1100/1101/1102` are supported.
+ 2. Download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/ROCmLibs.7z)
+
+ **Steps:**
+ 1. Rename `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\library` to `library.old`
+ 2. Unzip the content of the archive into `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\`
+
+### 3. Patching Applio
+
+1. **Move all `.bat` files from the `zluda` folder to the root directory of Applio.**
+
+2. **For Precompiled Applio:**
+ - Run `reinstall-torch.bat` to patch Applio.
+
+3. **For Applio Source Code:**
+ 1. Open `run-install.bat` and update the Torch versions on line 67:
+ ```sh
+ pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118
+ ```
+ 2. Execute `run-install.bat` to install the required libraries.
+ 3. Manually apply the code changes from the pull request.
+
+### 4. Download Zluda and Patch Torch Libraries
+
+1. **For HIP SDK 5.7:**
+ - Run `patch_zluda_hip57.bat`.
+ - Add `C:\Program Files\AMD\ROCm\5.7\bin` to your system's Path environment variable.
+
+2. **For HIP SDK 6.1:**
+ - Run `patch_zluda_hip61.bat`.
+ - Add `C:\Program Files\AMD\ROCm\6.1\bin` to your system's Path environment variable.
+
+### 5. Starting Applio
+
+- Execute `run-applio-zluda.bat` to start Applio.
+
diff --git a/assets/zluda/patch-zluda-hip57.bat b/assets/zluda/patch-zluda-hip57.bat
new file mode 100644
index 0000000000000000000000000000000000000000..1973a59a30b49da680b082b202281c691784e16f
--- /dev/null
+++ b/assets/zluda/patch-zluda-hip57.bat
@@ -0,0 +1,7 @@
+rmdir /S /q zluda
+curl -s -L https://github.com/lshqqytiger/ZLUDA/releases/download/rel.11cc5844514f93161e0e74387f04e2c537705a82/ZLUDA-windows-amd64.zip > zluda.zip
+tar -xf zluda.zip
+del zluda.zip
+copy zluda\cublas.dll env\Lib\site-packages\torch\lib\cublas64_11.dll /y
+copy zluda\cusparse.dll env\Lib\site-packages\torch\lib\cusparse64_11.dll /y
+copy zluda\nvrtc.dll env\Lib\site-packages\torch\lib\nvrtc64_112_0.dll /y
diff --git a/assets/zluda/patch-zluda-hip61.bat b/assets/zluda/patch-zluda-hip61.bat
new file mode 100644
index 0000000000000000000000000000000000000000..1378f9a6bcca9b03c9020e2011822e07b09e54fc
--- /dev/null
+++ b/assets/zluda/patch-zluda-hip61.bat
@@ -0,0 +1,7 @@
+rmdir /S /q zluda
+curl -s -L https://github.com/lshqqytiger/ZLUDA/releases/download/rel.86cdab3b14b556e95eafe370b8e8a1a80e8d093b/ZLUDA-windows-amd64.zip > zluda.zip
+tar -xf zluda.zip
+del zluda.zip
+copy zluda\cublas.dll env\Lib\site-packages\torch\lib\cublas64_11.dll /y
+copy zluda\cusparse.dll env\Lib\site-packages\torch\lib\cusparse64_11.dll /y
+copy zluda\nvrtc.dll env\Lib\site-packages\torch\lib\nvrtc64_112_0.dll /y
diff --git a/assets/zluda/reinstall-torch.bat b/assets/zluda/reinstall-torch.bat
new file mode 100644
index 0000000000000000000000000000000000000000..5dab777944fc1fabac91e148a6368a3cca5b26cc
--- /dev/null
+++ b/assets/zluda/reinstall-torch.bat
@@ -0,0 +1,11 @@
+set "principal=%cd%"
+set "URL_EXTRA=https://huggingface.co/IAHispano/applio/resolve/main"
+set "CONDA_ROOT_PREFIX=%UserProfile%\Miniconda3"
+set "INSTALL_ENV_DIR=%principal%\env"
+set "MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py39_23.9.0-0-Windows-x86_64.exe"
+set "CONDA_EXECUTABLE=%CONDA_ROOT_PREFIX%\Scripts\conda.exe"
+
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%"
+rem pip uninstall torch torchvision torchaudio -y
+rem pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" deactivate
diff --git a/assets/zluda/run-applio-amd.bat b/assets/zluda/run-applio-amd.bat
new file mode 100644
index 0000000000000000000000000000000000000000..0fe41e859526a7ef21422b19a3fa797d2738bd67
--- /dev/null
+++ b/assets/zluda/run-applio-amd.bat
@@ -0,0 +1,14 @@
+@echo off
+setlocal
+title Applio
+
+if not exist env (
+ echo Please run 'run-install.bat' first to set up the environment.
+ pause
+ exit /b 1
+)
+
+set HIP_VISIBLE_DEVICES="0"
+zluda\zluda.exe -- env\python.exe app.py --open
+echo.
+pause
\ No newline at end of file
diff --git a/core.py b/core.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee87ec787442fd5fe2ae024a7329eed43bcd0aa3
--- /dev/null
+++ b/core.py
@@ -0,0 +1,1751 @@
+import spaces
+import os
+import sys
+import json
+import argparse
+import subprocess
+from functools import lru_cache
+from distutils.util import strtobool
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+current_script_directory = os.path.dirname(os.path.realpath(__file__))
+logs_path = os.path.join(current_script_directory, "logs")
+
+from rvc.lib.tools.prerequisites_download import prequisites_download_pipeline
+from rvc.train.process.model_blender import model_blender
+from rvc.train.process.model_information import model_information
+from rvc.train.process.extract_small_model import extract_small_model
+from rvc.lib.tools.analyzer import analyze_audio
+from rvc.lib.tools.launch_tensorboard import launch_tensorboard_pipeline
+from rvc.lib.tools.model_download import model_download_pipeline
+
+python = sys.executable
+
+
+# Get TTS Voices -> https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4
+@lru_cache(maxsize=1) # Cache only one result since the file is static
+def load_voices_data():
+ with open(os.path.join("rvc", "lib", "tools", "tts_voices.json")) as f:
+ return json.load(f)
+
+
+voices_data = load_voices_data()
+locales = list({voice["Locale"] for voice in voices_data})
+
+
+@lru_cache(maxsize=None)
+def import_voice_converter():
+ from rvc.infer.infer import VoiceConverter
+
+ return VoiceConverter()
+
+
+@lru_cache(maxsize=1)
+def get_config():
+ from rvc.configs.config import Config
+
+ return Config()
+
+
+# Infer
+def run_infer_script(
+ pitch: int,
+ filter_radius: int,
+ index_rate: float,
+ volume_envelope: int,
+ protect: float,
+ hop_length: int,
+ f0_method: str,
+ input_path: str,
+ output_path: str,
+ pth_path: str,
+ index_path: str,
+ split_audio: bool,
+ f0_autotune: bool,
+ clean_audio: bool,
+ clean_strength: float,
+ export_format: str,
+ upscale_audio: bool,
+ f0_file: str,
+ embedder_model: str,
+ embedder_model_custom: str = None,
+ formant_shifting: bool = False,
+ formant_qfrency: float = 1.0,
+ formant_timbre: float = 1.0,
+ post_process: bool = False,
+ reverb: bool = False,
+ pitch_shift: bool = False,
+ limiter: bool = False,
+ gain: bool = False,
+ distortion: bool = False,
+ chorus: bool = False,
+ bitcrush: bool = False,
+ clipping: bool = False,
+ compressor: bool = False,
+ delay: bool = False,
+ *sliders: list,
+):
+ if not sliders:
+ sliders = [0] * 25
+ infer_pipeline = import_voice_converter()
+ additional_params = {
+ "reverb_room_size": sliders[0],
+ "reverb_damping": sliders[1],
+ "reverb_wet_level": sliders[2],
+ "reverb_dry_level": sliders[3],
+ "reverb_width": sliders[4],
+ "reverb_freeze_mode": sliders[5],
+ "pitch_shift_semitones": sliders[6],
+ "limiter_threshold": sliders[7],
+ "limiter_release": sliders[8],
+ "gain_db": sliders[9],
+ "distortion_gain": sliders[10],
+ "chorus_rate": sliders[11],
+ "chorus_depth": sliders[12],
+ "chorus_delay": sliders[13],
+ "chorus_feedback": sliders[14],
+ "chorus_mix": sliders[15],
+ "bitcrush_bit_depth": sliders[16],
+ "clipping_threshold": sliders[17],
+ "compressor_threshold": sliders[18],
+ "compressor_ratio": sliders[19],
+ "compressor_attack": sliders[20],
+ "compressor_release": sliders[21],
+ "delay_seconds": sliders[22],
+ "delay_feedback": sliders[23],
+ "delay_mix": sliders[24],
+ }
+ infer_pipeline.convert_audio(
+ pitch=pitch,
+ filter_radius=filter_radius,
+ index_rate=index_rate,
+ volume_envelope=volume_envelope,
+ protect=protect,
+ hop_length=hop_length,
+ f0_method=f0_method,
+ audio_input_path=input_path,
+ audio_output_path=output_path,
+ model_path=pth_path,
+ index_path=index_path,
+ split_audio=split_audio,
+ f0_autotune=f0_autotune,
+ clean_audio=clean_audio,
+ clean_strength=clean_strength,
+ export_format=export_format,
+ upscale_audio=upscale_audio,
+ f0_file=f0_file,
+ embedder_model=embedder_model,
+ embedder_model_custom=embedder_model_custom,
+ formant_shifting=formant_shifting,
+ formant_qfrency=formant_qfrency,
+ formant_timbre=formant_timbre,
+ post_process=post_process,
+ reverb=reverb,
+ pitch_shift=pitch_shift,
+ limiter=limiter,
+ gain=gain,
+ distortion=distortion,
+ chorus=chorus,
+ bitcrush=bitcrush,
+ clipping=clipping,
+ compressor=compressor,
+ delay=delay,
+ sliders=additional_params,
+ )
+ return f"File {input_path} inferred successfully.", output_path.replace(
+ ".wav", f".{export_format.lower()}"
+ )
+
+
+# Batch infer
+def run_batch_infer_script(
+ pitch: int,
+ filter_radius: int,
+ index_rate: float,
+ volume_envelope: int,
+ protect: float,
+ hop_length: int,
+ f0_method: str,
+ input_folder: str,
+ output_folder: str,
+ pth_path: str,
+ index_path: str,
+ split_audio: bool,
+ f0_autotune: bool,
+ clean_audio: bool,
+ clean_strength: float,
+ export_format: str,
+ upscale_audio: bool,
+ f0_file: str,
+ embedder_model: str,
+ embedder_model_custom: str = None,
+ formant_shifting: bool = False,
+ formant_qfrency: float = 1.0,
+ formant_timbre: float = 1.0,
+ post_process: bool = False,
+ reverb: bool = False,
+ pitch_shift: bool = False,
+ limiter: bool = False,
+ gain: bool = False,
+ distortion: bool = False,
+ chorus: bool = False,
+ bitcrush: bool = False,
+ clipping: bool = False,
+ compressor: bool = False,
+ delay: bool = False,
+ *sliders: list,
+):
+ audio_files = [
+ f for f in os.listdir(input_folder) if f.endswith((".mp3", ".wav", ".flac"))
+ ]
+ print(f"Detected {len(audio_files)} audio files for inference.")
+ if not sliders:
+ sliders = [0] * 25
+ infer_pipeline = import_voice_converter()
+ additional_params = {
+ "reverb_room_size": sliders[0],
+ "reverb_damping": sliders[1],
+ "reverb_wet_level": sliders[2],
+ "reverb_dry_level": sliders[3],
+ "reverb_width": sliders[4],
+ "reverb_freeze_mode": sliders[5],
+ "pitch_shift_semitones": sliders[6],
+ "limiter_threshold": sliders[7],
+ "limiter_release": sliders[8],
+ "gain_db": sliders[9],
+ "distortion_gain": sliders[10],
+ "chorus_rate": sliders[11],
+ "chorus_depth": sliders[12],
+ "chorus_delay": sliders[13],
+ "chorus_feedback": sliders[14],
+ "chorus_mix": sliders[15],
+ "bitcrush_bit_depth": sliders[16],
+ "clipping_threshold": sliders[17],
+ "compressor_threshold": sliders[18],
+ "compressor_ratio": sliders[19],
+ "compressor_attack": sliders[20],
+ "compressor_release": sliders[21],
+ "delay_seconds": sliders[22],
+ "delay_feedback": sliders[23],
+ "delay_mix": sliders[24],
+ }
+ infer_pipeline.convert_audio_batch(
+ pitch=pitch,
+ filter_radius=filter_radius,
+ index_rate=index_rate,
+ volume_envelope=volume_envelope,
+ protect=protect,
+ hop_length=hop_length,
+ f0_method=f0_method,
+ audio_input_paths=input_folder,
+ audio_output_path=output_folder,
+ model_path=pth_path,
+ index_path=index_path,
+ split_audio=split_audio,
+ f0_autotune=f0_autotune,
+ clean_audio=clean_audio,
+ clean_strength=clean_strength,
+ export_format=export_format,
+ upscale_audio=upscale_audio,
+ f0_file=f0_file,
+ embedder_model=embedder_model,
+ embedder_model_custom=embedder_model_custom,
+ formant_shifting=formant_shifting,
+ formant_qfrency=formant_qfrency,
+ formant_timbre=formant_timbre,
+ pid_file_path=os.path.join(now_dir, "assets", "infer_pid.txt"),
+ post_process=post_process,
+ reverb=reverb,
+ pitch_shift=pitch_shift,
+ limiter=limiter,
+ gain=gain,
+ distortion=distortion,
+ chorus=chorus,
+ bitcrush=bitcrush,
+ clipping=clipping,
+ compressor=compressor,
+ delay=delay,
+ sliders=additional_params,
+ )
+
+ return f"Files from {input_folder} inferred successfully."
+
+
+# TTS
+def run_tts_script(
+ tts_text: str,
+ tts_voice: str,
+ tts_rate: int,
+ pitch: int,
+ filter_radius: int,
+ index_rate: float,
+ volume_envelope: int,
+ protect: float,
+ hop_length: int,
+ f0_method: str,
+ output_tts_path: str,
+ output_rvc_path: str,
+ pth_path: str,
+ index_path: str,
+ split_audio: bool,
+ f0_autotune: bool,
+ clean_audio: bool,
+ clean_strength: float,
+ export_format: str,
+ upscale_audio: bool,
+ f0_file: str,
+ embedder_model: str,
+ embedder_model_custom: str = None,
+):
+
+ tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py")
+
+ if os.path.exists(output_tts_path):
+ os.remove(output_tts_path)
+
+ command_tts = [
+ *map(
+ str,
+ [
+ python,
+ tts_script_path,
+ tts_text,
+ tts_voice,
+ tts_rate,
+ output_tts_path,
+ ],
+ ),
+ ]
+ subprocess.run(command_tts)
+ infer_pipeline = import_voice_converter()
+ infer_pipeline.convert_audio(
+ pitch=pitch,
+ filter_radius=filter_radius,
+ index_rate=index_rate,
+ volume_envelope=volume_envelope,
+ protect=protect,
+ hop_length=hop_length,
+ f0_method=f0_method,
+ audio_input_path=output_tts_path,
+ audio_output_path=output_rvc_path,
+ model_path=pth_path,
+ index_path=index_path,
+ split_audio=split_audio,
+ f0_autotune=f0_autotune,
+ clean_audio=clean_audio,
+ clean_strength=clean_strength,
+ export_format=export_format,
+ upscale_audio=upscale_audio,
+ f0_file=f0_file,
+ embedder_model=embedder_model,
+ embedder_model_custom=embedder_model_custom,
+ formant_shifting=None,
+ formant_qfrency=None,
+ formant_timbre=None,
+ post_process=None,
+ reverb=None,
+ pitch_shift=None,
+ limiter=None,
+ gain=None,
+ distortion=None,
+ chorus=None,
+ bitcrush=None,
+ clipping=None,
+ compressor=None,
+ delay=None,
+ sliders=None,
+ )
+
+ return f"Text {tts_text} synthesized successfully.", output_rvc_path.replace(
+ ".wav", f".{export_format.lower()}"
+ )
+
+
+# Preprocess
+def run_preprocess_script(
+ model_name: str,
+ dataset_path: str,
+ sample_rate: int,
+ cpu_cores: int,
+ cut_preprocess: bool,
+ process_effects: bool,
+):
+ config = get_config()
+ per = 3.0 if config.is_half else 3.7
+ preprocess_script_path = os.path.join("rvc", "train", "preprocess", "preprocess.py")
+ command = [
+ python,
+ preprocess_script_path,
+ *map(
+ str,
+ [
+ os.path.join(logs_path, model_name),
+ dataset_path,
+ sample_rate,
+ per,
+ cpu_cores,
+ cut_preprocess,
+ process_effects,
+ ],
+ ),
+ ]
+ subprocess.run(command)
+ return f"Model {model_name} preprocessed successfully."
+
+
+# Extract
+def run_extract_script(
+ model_name: str,
+ rvc_version: str,
+ f0_method: str,
+ pitch_guidance: bool,
+ hop_length: int,
+ cpu_cores: int,
+ gpu: int,
+ sample_rate: int,
+ embedder_model: str,
+ embedder_model_custom: str = None,
+):
+
+ model_path = os.path.join(logs_path, model_name)
+ extract = os.path.join("rvc", "train", "extract", "extract.py")
+
+ command_1 = [
+ python,
+ extract,
+ *map(
+ str,
+ [
+ model_path,
+ f0_method,
+ hop_length,
+ cpu_cores,
+ gpu,
+ rvc_version,
+ pitch_guidance,
+ sample_rate,
+ embedder_model,
+ embedder_model_custom,
+ ],
+ ),
+ ]
+
+ subprocess.run(command_1)
+
+ return f"Model {model_name} extracted successfully."
+
+
+# Train
+@spaces.GPU
+def run_train_script(
+ model_name: str,
+ rvc_version: str,
+ save_every_epoch: int,
+ save_only_latest: bool,
+ save_every_weights: bool,
+ total_epoch: int,
+ sample_rate: int,
+ batch_size: int,
+ gpu: int,
+ pitch_guidance: bool,
+ overtraining_detector: bool,
+ overtraining_threshold: int,
+ pretrained: bool,
+ sync_graph: bool,
+ index_algorithm: str = "Auto",
+ cache_data_in_gpu: bool = False,
+ custom_pretrained: bool = False,
+ g_pretrained_path: str = None,
+ d_pretrained_path: str = None,
+):
+
+ if pretrained == True:
+ from rvc.lib.tools.pretrained_selector import pretrained_selector
+
+ if custom_pretrained == False:
+ pg, pd = pretrained_selector(bool(pitch_guidance))[str(rvc_version)][
+ int(sample_rate)
+ ]
+ else:
+ if g_pretrained_path is None or d_pretrained_path is None:
+ raise ValueError(
+ "Please provide the path to the pretrained G and D models."
+ )
+ pg, pd = g_pretrained_path, d_pretrained_path
+ else:
+ pg, pd = "", ""
+
+ train_script_path = os.path.join("rvc", "train", "train.py")
+ command = [
+ python,
+ train_script_path,
+ *map(
+ str,
+ [
+ model_name,
+ save_every_epoch,
+ total_epoch,
+ pg,
+ pd,
+ rvc_version,
+ gpu,
+ batch_size,
+ sample_rate,
+ pitch_guidance,
+ save_only_latest,
+ save_every_weights,
+ cache_data_in_gpu,
+ overtraining_detector,
+ overtraining_threshold,
+ sync_graph,
+ ],
+ ),
+ ]
+ subprocess.run(command)
+ run_index_script(model_name, rvc_version, index_algorithm)
+ return f"Model {model_name} trained successfully."
+
+
+# Index
+def run_index_script(model_name: str, rvc_version: str, index_algorithm: str):
+ index_script_path = os.path.join("rvc", "train", "process", "extract_index.py")
+ command = [
+ python,
+ index_script_path,
+ os.path.join(logs_path, model_name),
+ rvc_version,
+ index_algorithm,
+ ]
+
+ subprocess.run(command)
+ return f"Index file for {model_name} generated successfully."
+
+
+# Model extract
+def run_model_extract_script(
+ pth_path: str,
+ model_name: str,
+ sample_rate: int,
+ pitch_guidance: bool,
+ rvc_version: str,
+ epoch: int,
+ step: int,
+):
+ extract_small_model(
+ pth_path, model_name, sample_rate, pitch_guidance, rvc_version, epoch, step
+ )
+ return f"Model {model_name} extracted successfully."
+
+
+# Model information
+def run_model_information_script(pth_path: str):
+ print(model_information(pth_path))
+ return model_information(pth_path)
+
+
+# Model blender
+def run_model_blender_script(
+ model_name: str, pth_path_1: str, pth_path_2: str, ratio: float
+):
+ message, model_blended = model_blender(model_name, pth_path_1, pth_path_2, ratio)
+ return message, model_blended
+
+
+# Tensorboard
+def run_tensorboard_script():
+ launch_tensorboard_pipeline()
+
+
+# Download
+def run_download_script(model_link: str):
+ model_download_pipeline(model_link)
+ return f"Model downloaded successfully."
+
+
+# Prerequisites
+def run_prerequisites_script(
+ pretraineds_v1: bool, pretraineds_v2: bool, models: bool, exe: bool
+):
+ prequisites_download_pipeline(pretraineds_v1, pretraineds_v2, models, exe)
+ return "Prerequisites installed successfully."
+
+
+# Audio analyzer
+def run_audio_analyzer_script(
+ input_path: str, save_plot_path: str = "logs/audio_analysis.png"
+):
+ audio_info, plot_path = analyze_audio(input_path, save_plot_path)
+ print(
+ f"Audio info of {input_path}: {audio_info}",
+ f"Audio file {input_path} analyzed successfully. Plot saved at: {plot_path}",
+ )
+ return audio_info, plot_path
+
+
+def run_model_author_script(model_author: str):
+ with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
+ config = json.load(f)
+
+ config["model_author"] = model_author
+
+ with open(os.path.join(now_dir, "assets", "config.json"), "w") as f:
+ json.dump(config, f, indent=4)
+
+ print(f"Model author set to {model_author}.")
+ return f"Model author set to {model_author}."
+
+
+# API
+def run_api_script(ip: str, port: int):
+ command = [
+ "env/Scripts/uvicorn.exe" if os.name == "nt" else "uvicorn",
+ "api:app",
+ "--host",
+ ip,
+ "--port",
+ port,
+ ]
+ subprocess.run(command)
+
+
+# Parse arguments
+def parse_arguments():
+ parser = argparse.ArgumentParser(
+ description="Run the main.py script with specific parameters."
+ )
+ subparsers = parser.add_subparsers(
+ title="subcommands", dest="mode", help="Choose a mode"
+ )
+
+ # Parser for 'infer' mode
+ infer_parser = subparsers.add_parser("infer", help="Run inference")
+ pitch_description = (
+ "Set the pitch of the audio. Higher values result in a higher pitch."
+ )
+ infer_parser.add_argument(
+ "--pitch",
+ type=int,
+ help=pitch_description,
+ choices=range(-24, 25),
+ default=0,
+ )
+ filter_radius_description = "Apply median filtering to the extracted pitch values if this value is greater than or equal to three. This can help reduce breathiness in the output audio."
+ infer_parser.add_argument(
+ "--filter_radius",
+ type=int,
+ help=filter_radius_description,
+ choices=range(11),
+ default=3,
+ )
+ index_rate_description = "Control the influence of the index file on the output. Higher values mean stronger influence. Lower values can help reduce artifacts but may result in less accurate voice cloning."
+ infer_parser.add_argument(
+ "--index_rate",
+ type=float,
+ help=index_rate_description,
+ choices=[(i / 10) for i in range(11)],
+ default=0.3,
+ )
+ volume_envelope_description = "Control the blending of the output's volume envelope. A value of 1 means the output envelope is fully used."
+ infer_parser.add_argument(
+ "--volume_envelope",
+ type=float,
+ help=volume_envelope_description,
+ choices=[(i / 10) for i in range(11)],
+ default=1,
+ )
+ protect_description = "Protect consonants and breathing sounds from artifacts. A value of 0.5 offers the strongest protection, while lower values may reduce the protection level but potentially mitigate the indexing effect."
+ infer_parser.add_argument(
+ "--protect",
+ type=float,
+ help=protect_description,
+ choices=[(i / 10) for i in range(6)],
+ default=0.33,
+ )
+ hop_length_description = "Only applicable for the Crepe pitch extraction method. Determines the time it takes for the system to react to a significant pitch change. Smaller values require more processing time but can lead to better pitch accuracy."
+ infer_parser.add_argument(
+ "--hop_length",
+ type=int,
+ help=hop_length_description,
+ choices=range(1, 513),
+ default=128,
+ )
+ f0_method_description = "Choose the pitch extraction algorithm for the conversion. 'rmvpe' is the default and generally recommended."
+ infer_parser.add_argument(
+ "--f0_method",
+ type=str,
+ help=f0_method_description,
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ "fcpe",
+ "hybrid[crepe+rmvpe]",
+ "hybrid[crepe+fcpe]",
+ "hybrid[rmvpe+fcpe]",
+ "hybrid[crepe+rmvpe+fcpe]",
+ ],
+ default="rmvpe",
+ )
+ infer_parser.add_argument(
+ "--input_path",
+ type=str,
+ help="Full path to the input audio file.",
+ required=True,
+ )
+ infer_parser.add_argument(
+ "--output_path",
+ type=str,
+ help="Full path to the output audio file.",
+ required=True,
+ )
+ pth_path_description = "Full path to the RVC model file (.pth)."
+ infer_parser.add_argument(
+ "--pth_path", type=str, help=pth_path_description, required=True
+ )
+ index_path_description = "Full path to the index file (.index)."
+ infer_parser.add_argument(
+ "--index_path", type=str, help=index_path_description, required=True
+ )
+ split_audio_description = "Split the audio into smaller segments before inference. This can improve the quality of the output for longer audio files."
+ infer_parser.add_argument(
+ "--split_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=split_audio_description,
+ default=False,
+ )
+ f0_autotune_description = "Apply a light autotune to the inferred audio. Particularly useful for singing voice conversions."
+ infer_parser.add_argument(
+ "--f0_autotune",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=f0_autotune_description,
+ default=False,
+ )
+ clean_audio_description = "Clean the output audio using noise reduction algorithms. Recommended for speech conversions."
+ infer_parser.add_argument(
+ "--clean_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=clean_audio_description,
+ default=False,
+ )
+ clean_strength_description = "Adjust the intensity of the audio cleaning process. Higher values result in stronger cleaning, but may lead to a more compressed sound."
+ infer_parser.add_argument(
+ "--clean_strength",
+ type=float,
+ help=clean_strength_description,
+ choices=[(i / 10) for i in range(11)],
+ default=0.7,
+ )
+ export_format_description = "Select the desired output audio format."
+ infer_parser.add_argument(
+ "--export_format",
+ type=str,
+ help=export_format_description,
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+ default="WAV",
+ )
+ embedder_model_description = (
+ "Choose the model used for generating speaker embeddings."
+ )
+ infer_parser.add_argument(
+ "--embedder_model",
+ type=str,
+ help=embedder_model_description,
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ default="contentvec",
+ )
+ embedder_model_custom_description = "Specify the path to a custom model for speaker embedding. Only applicable if 'embedder_model' is set to 'custom'."
+ infer_parser.add_argument(
+ "--embedder_model_custom",
+ type=str,
+ help=embedder_model_custom_description,
+ default=None,
+ )
+ upscale_audio_description = "Upscale the input audio to a higher quality before processing. This can improve the overall quality of the output, especially for low-quality input audio."
+ infer_parser.add_argument(
+ "--upscale_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=upscale_audio_description,
+ default=False,
+ )
+ f0_file_description = "Full path to an external F0 file (.f0). This allows you to use pre-computed pitch values for the input audio."
+ infer_parser.add_argument(
+ "--f0_file",
+ type=str,
+ help=f0_file_description,
+ default=None,
+ )
+ formant_shifting_description = "Apply formant shifting to the input audio. This can help adjust the timbre of the voice."
+ infer_parser.add_argument(
+ "--formant_shifting",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=formant_shifting_description,
+ default=False,
+ required=False,
+ )
+ formant_qfrency_description = "Control the frequency of the formant shifting effect. Higher values result in a more pronounced effect."
+ infer_parser.add_argument(
+ "--formant_qfrency",
+ type=float,
+ help=formant_qfrency_description,
+ default=1.0,
+ required=False,
+ )
+ formant_timbre_description = "Control the timbre of the formant shifting effect. Higher values result in a more pronounced effect."
+ infer_parser.add_argument(
+ "--formant_timbre",
+ type=float,
+ help=formant_timbre_description,
+ default=1.0,
+ required=False,
+ )
+
+ # Parser for 'batch_infer' mode
+ batch_infer_parser = subparsers.add_parser(
+ "batch_infer",
+ help="Run batch inference",
+ )
+ batch_infer_parser.add_argument(
+ "--pitch",
+ type=int,
+ help=pitch_description,
+ choices=range(-24, 25),
+ default=0,
+ )
+ batch_infer_parser.add_argument(
+ "--filter_radius",
+ type=int,
+ help=filter_radius_description,
+ choices=range(11),
+ default=3,
+ )
+ batch_infer_parser.add_argument(
+ "--index_rate",
+ type=float,
+ help=index_rate_description,
+ choices=[(i / 10) for i in range(11)],
+ default=0.3,
+ )
+ batch_infer_parser.add_argument(
+ "--volume_envelope",
+ type=float,
+ help=volume_envelope_description,
+ choices=[(i / 10) for i in range(11)],
+ default=1,
+ )
+ batch_infer_parser.add_argument(
+ "--protect",
+ type=float,
+ help=protect_description,
+ choices=[(i / 10) for i in range(6)],
+ default=0.33,
+ )
+ batch_infer_parser.add_argument(
+ "--hop_length",
+ type=int,
+ help=hop_length_description,
+ choices=range(1, 513),
+ default=128,
+ )
+ batch_infer_parser.add_argument(
+ "--f0_method",
+ type=str,
+ help=f0_method_description,
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ "fcpe",
+ "hybrid[crepe+rmvpe]",
+ "hybrid[crepe+fcpe]",
+ "hybrid[rmvpe+fcpe]",
+ "hybrid[crepe+rmvpe+fcpe]",
+ ],
+ default="rmvpe",
+ )
+ batch_infer_parser.add_argument(
+ "--input_folder",
+ type=str,
+ help="Path to the folder containing input audio files.",
+ required=True,
+ )
+ batch_infer_parser.add_argument(
+ "--output_folder",
+ type=str,
+ help="Path to the folder for saving output audio files.",
+ required=True,
+ )
+ batch_infer_parser.add_argument(
+ "--pth_path", type=str, help=pth_path_description, required=True
+ )
+ batch_infer_parser.add_argument(
+ "--index_path", type=str, help=index_path_description, required=True
+ )
+ batch_infer_parser.add_argument(
+ "--split_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=split_audio_description,
+ default=False,
+ )
+ batch_infer_parser.add_argument(
+ "--f0_autotune",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=f0_autotune_description,
+ default=False,
+ )
+ batch_infer_parser.add_argument(
+ "--clean_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=clean_audio_description,
+ default=False,
+ )
+ batch_infer_parser.add_argument(
+ "--clean_strength",
+ type=float,
+ help=clean_strength_description,
+ choices=[(i / 10) for i in range(11)],
+ default=0.7,
+ )
+ batch_infer_parser.add_argument(
+ "--export_format",
+ type=str,
+ help=export_format_description,
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+ default="WAV",
+ )
+ batch_infer_parser.add_argument(
+ "--embedder_model",
+ type=str,
+ help=embedder_model_description,
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ default="contentvec",
+ )
+ batch_infer_parser.add_argument(
+ "--embedder_model_custom",
+ type=str,
+ help=embedder_model_custom_description,
+ default=None,
+ )
+ batch_infer_parser.add_argument(
+ "--upscale_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=upscale_audio_description,
+ default=False,
+ )
+ batch_infer_parser.add_argument(
+ "--f0_file",
+ type=str,
+ help=f0_file_description,
+ default=None,
+ )
+ batch_infer_parser.add_argument(
+ "--formant_shifting",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=formant_shifting_description,
+ default=False,
+ required=False,
+ )
+ batch_infer_parser.add_argument(
+ "--formant_qfrency",
+ type=float,
+ help=formant_qfrency_description,
+ default=1.0,
+ required=False,
+ )
+ batch_infer_parser.add_argument(
+ "--formant_timbre",
+ type=float,
+ help=formant_timbre_description,
+ default=1.0,
+ required=False,
+ )
+
+ # Parser for 'tts' mode
+ tts_parser = subparsers.add_parser("tts", help="Run TTS inference")
+ tts_parser.add_argument(
+ "--tts_text", type=str, help="Text to be synthesized", required=True
+ )
+ tts_parser.add_argument(
+ "--tts_voice",
+ type=str,
+ help="Voice to be used for TTS synthesis.",
+ choices=locales,
+ required=True,
+ )
+ tts_parser.add_argument(
+ "--tts_rate",
+ type=int,
+ help="Control the speaking rate of the TTS. Values range from -100 (slower) to 100 (faster).",
+ choices=range(-100, 101),
+ default=0,
+ )
+ tts_parser.add_argument(
+ "--pitch",
+ type=int,
+ help=pitch_description,
+ choices=range(-24, 25),
+ default=0,
+ )
+ tts_parser.add_argument(
+ "--filter_radius",
+ type=int,
+ help=filter_radius_description,
+ choices=range(11),
+ default=3,
+ )
+ tts_parser.add_argument(
+ "--index_rate",
+ type=float,
+ help=index_rate_description,
+ choices=[(i / 10) for i in range(11)],
+ default=0.3,
+ )
+ tts_parser.add_argument(
+ "--volume_envelope",
+ type=float,
+ help=volume_envelope_description,
+ choices=[(i / 10) for i in range(11)],
+ default=1,
+ )
+ tts_parser.add_argument(
+ "--protect",
+ type=float,
+ help=protect_description,
+ choices=[(i / 10) for i in range(6)],
+ default=0.33,
+ )
+ tts_parser.add_argument(
+ "--hop_length",
+ type=int,
+ help=hop_length_description,
+ choices=range(1, 513),
+ default=128,
+ )
+ tts_parser.add_argument(
+ "--f0_method",
+ type=str,
+ help=f0_method_description,
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ "fcpe",
+ "hybrid[crepe+rmvpe]",
+ "hybrid[crepe+fcpe]",
+ "hybrid[rmvpe+fcpe]",
+ "hybrid[crepe+rmvpe+fcpe]",
+ ],
+ default="rmvpe",
+ )
+ tts_parser.add_argument(
+ "--output_tts_path",
+ type=str,
+ help="Full path to save the synthesized TTS audio.",
+ required=True,
+ )
+ tts_parser.add_argument(
+ "--output_rvc_path",
+ type=str,
+ help="Full path to save the voice-converted audio using the synthesized TTS.",
+ required=True,
+ )
+ tts_parser.add_argument(
+ "--pth_path", type=str, help=pth_path_description, required=True
+ )
+ tts_parser.add_argument(
+ "--index_path", type=str, help=index_path_description, required=True
+ )
+ tts_parser.add_argument(
+ "--split_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=split_audio_description,
+ default=False,
+ )
+ tts_parser.add_argument(
+ "--f0_autotune",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=f0_autotune_description,
+ default=False,
+ )
+ tts_parser.add_argument(
+ "--clean_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=clean_audio_description,
+ default=False,
+ )
+ tts_parser.add_argument(
+ "--clean_strength",
+ type=float,
+ help=clean_strength_description,
+ choices=[(i / 10) for i in range(11)],
+ default=0.7,
+ )
+ tts_parser.add_argument(
+ "--export_format",
+ type=str,
+ help=export_format_description,
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+ default="WAV",
+ )
+ tts_parser.add_argument(
+ "--embedder_model",
+ type=str,
+ help=embedder_model_description,
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ default="contentvec",
+ )
+ tts_parser.add_argument(
+ "--embedder_model_custom",
+ type=str,
+ help=embedder_model_custom_description,
+ default=None,
+ )
+ tts_parser.add_argument(
+ "--upscale_audio",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help=upscale_audio_description,
+ default=False,
+ )
+ tts_parser.add_argument(
+ "--f0_file",
+ type=str,
+ help=f0_file_description,
+ default=None,
+ )
+
+ # Parser for 'preprocess' mode
+ preprocess_parser = subparsers.add_parser(
+ "preprocess", help="Preprocess a dataset for training."
+ )
+ preprocess_parser.add_argument(
+ "--model_name", type=str, help="Name of the model to be trained.", required=True
+ )
+ preprocess_parser.add_argument(
+ "--dataset_path", type=str, help="Path to the dataset directory.", required=True
+ )
+ preprocess_parser.add_argument(
+ "--sample_rate",
+ type=int,
+ help="Target sampling rate for the audio data.",
+ choices=[32000, 40000, 48000],
+ required=True,
+ )
+ preprocess_parser.add_argument(
+ "--cpu_cores",
+ type=int,
+ help="Number of CPU cores to use for preprocessing.",
+ choices=range(1, 65),
+ )
+ preprocess_parser.add_argument(
+ "--cut_preprocess",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Cut the dataset into smaller segments for faster preprocessing.",
+ default=True,
+ required=False,
+ )
+ preprocess_parser.add_argument(
+ "--process_effects",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Disable all filters during preprocessing.",
+ default=False,
+ required=False,
+ )
+
+ # Parser for 'extract' mode
+ extract_parser = subparsers.add_parser(
+ "extract", help="Extract features from a dataset."
+ )
+ extract_parser.add_argument(
+ "--model_name", type=str, help="Name of the model.", required=True
+ )
+ extract_parser.add_argument(
+ "--rvc_version",
+ type=str,
+ help="Version of the RVC model ('v1' or 'v2').",
+ choices=["v1", "v2"],
+ default="v2",
+ )
+ extract_parser.add_argument(
+ "--f0_method",
+ type=str,
+ help="Pitch extraction method to use.",
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ ],
+ default="rmvpe",
+ )
+ extract_parser.add_argument(
+ "--pitch_guidance",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Enable or disable pitch guidance during feature extraction.",
+ default=True,
+ )
+ extract_parser.add_argument(
+ "--hop_length",
+ type=int,
+ help="Hop length for feature extraction. Only applicable for Crepe pitch extraction.",
+ choices=range(1, 513),
+ default=128,
+ )
+ extract_parser.add_argument(
+ "--cpu_cores",
+ type=int,
+ help="Number of CPU cores to use for feature extraction (optional).",
+ choices=range(1, 65),
+ default=None,
+ )
+ extract_parser.add_argument(
+ "--gpu",
+ type=int,
+ help="GPU device to use for feature extraction (optional).",
+ default="-",
+ )
+ extract_parser.add_argument(
+ "--sample_rate",
+ type=int,
+ help="Target sampling rate for the audio data.",
+ choices=[32000, 40000, 48000],
+ required=True,
+ )
+ extract_parser.add_argument(
+ "--embedder_model",
+ type=str,
+ help=embedder_model_description,
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ default="contentvec",
+ )
+ extract_parser.add_argument(
+ "--embedder_model_custom",
+ type=str,
+ help=embedder_model_custom_description,
+ default=None,
+ )
+
+ # Parser for 'train' mode
+ train_parser = subparsers.add_parser("train", help="Train an RVC model.")
+ train_parser.add_argument(
+ "--model_name", type=str, help="Name of the model to be trained.", required=True
+ )
+ train_parser.add_argument(
+ "--rvc_version",
+ type=str,
+ help="Version of the RVC model to train ('v1' or 'v2').",
+ choices=["v1", "v2"],
+ default="v2",
+ )
+ train_parser.add_argument(
+ "--save_every_epoch",
+ type=int,
+ help="Save the model every specified number of epochs.",
+ choices=range(1, 101),
+ required=True,
+ )
+ train_parser.add_argument(
+ "--save_only_latest",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Save only the latest model checkpoint.",
+ default=False,
+ )
+ train_parser.add_argument(
+ "--save_every_weights",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Save model weights every epoch.",
+ default=True,
+ )
+ train_parser.add_argument(
+ "--total_epoch",
+ type=int,
+ help="Total number of epochs to train for.",
+ choices=range(1, 10001),
+ default=1000,
+ )
+ train_parser.add_argument(
+ "--sample_rate",
+ type=int,
+ help="Sampling rate of the training data.",
+ choices=[32000, 40000, 48000],
+ required=True,
+ )
+ train_parser.add_argument(
+ "--batch_size",
+ type=int,
+ help="Batch size for training.",
+ choices=range(1, 51),
+ default=8,
+ )
+ train_parser.add_argument(
+ "--gpu",
+ type=str,
+ help="GPU device to use for training (e.g., '0').",
+ default="0",
+ )
+ train_parser.add_argument(
+ "--pitch_guidance",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Enable or disable pitch guidance during training.",
+ default=True,
+ )
+ train_parser.add_argument(
+ "--pretrained",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Use a pretrained model for initialization.",
+ default=True,
+ )
+ train_parser.add_argument(
+ "--custom_pretrained",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Use a custom pretrained model.",
+ default=False,
+ )
+ train_parser.add_argument(
+ "--g_pretrained_path",
+ type=str,
+ nargs="?",
+ default=None,
+ help="Path to the pretrained generator model file.",
+ )
+ train_parser.add_argument(
+ "--d_pretrained_path",
+ type=str,
+ nargs="?",
+ default=None,
+ help="Path to the pretrained discriminator model file.",
+ )
+ train_parser.add_argument(
+ "--overtraining_detector",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Enable overtraining detection.",
+ default=False,
+ )
+ train_parser.add_argument(
+ "--overtraining_threshold",
+ type=int,
+ help="Threshold for overtraining detection.",
+ choices=range(1, 101),
+ default=50,
+ )
+ train_parser.add_argument(
+ "--sync_graph",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Enable graph synchronization for distributed training.",
+ default=False,
+ )
+ train_parser.add_argument(
+ "--cache_data_in_gpu",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Cache training data in GPU memory.",
+ default=False,
+ )
+ train_parser.add_argument(
+ "--index_algorithm",
+ type=str,
+ choices=["Auto", "Faiss", "KMeans"],
+ help="Choose the method for generating the index file.",
+ default="Auto",
+ required=False,
+ )
+
+ # Parser for 'index' mode
+ index_parser = subparsers.add_parser(
+ "index", help="Generate an index file for an RVC model."
+ )
+ index_parser.add_argument(
+ "--model_name", type=str, help="Name of the model.", required=True
+ )
+ index_parser.add_argument(
+ "--rvc_version",
+ type=str,
+ help="Version of the RVC model ('v1' or 'v2').",
+ choices=["v1", "v2"],
+ default="v2",
+ )
+ index_parser.add_argument(
+ "--index_algorithm",
+ type=str,
+ choices=["Auto", "Faiss", "KMeans"],
+ help="Choose the method for generating the index file.",
+ default="Auto",
+ required=False,
+ )
+
+ # Parser for 'model_extract' mode
+ model_extract_parser = subparsers.add_parser(
+ "model_extract", help="Extract a specific epoch from a trained model."
+ )
+ model_extract_parser.add_argument(
+ "--pth_path", type=str, help="Path to the main .pth model file.", required=True
+ )
+ model_extract_parser.add_argument(
+ "--model_name", type=str, help="Name of the model.", required=True
+ )
+ model_extract_parser.add_argument(
+ "--sample_rate",
+ type=int,
+ help="Sampling rate of the extracted model.",
+ choices=[32000, 40000, 48000],
+ required=True,
+ )
+ model_extract_parser.add_argument(
+ "--pitch_guidance",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ help="Enable or disable pitch guidance for the extracted model.",
+ required=True,
+ )
+ model_extract_parser.add_argument(
+ "--rvc_version",
+ type=str,
+ help="Version of the extracted RVC model ('v1' or 'v2').",
+ choices=["v1", "v2"],
+ default="v2",
+ )
+ model_extract_parser.add_argument(
+ "--epoch",
+ type=int,
+ help="Epoch number to extract from the model.",
+ choices=range(1, 10001),
+ required=True,
+ )
+ model_extract_parser.add_argument(
+ "--step",
+ type=str,
+ help="Step number to extract from the model (optional).",
+ required=False,
+ )
+
+ # Parser for 'model_information' mode
+ model_information_parser = subparsers.add_parser(
+ "model_information", help="Display information about a trained model."
+ )
+ model_information_parser.add_argument(
+ "--pth_path", type=str, help="Path to the .pth model file.", required=True
+ )
+
+ # Parser for 'model_blender' mode
+ model_blender_parser = subparsers.add_parser(
+ "model_blender", help="Fuse two RVC models together."
+ )
+ model_blender_parser.add_argument(
+ "--model_name", type=str, help="Name of the new fused model.", required=True
+ )
+ model_blender_parser.add_argument(
+ "--pth_path_1",
+ type=str,
+ help="Path to the first .pth model file.",
+ required=True,
+ )
+ model_blender_parser.add_argument(
+ "--pth_path_2",
+ type=str,
+ help="Path to the second .pth model file.",
+ required=True,
+ )
+ model_blender_parser.add_argument(
+ "--ratio",
+ type=float,
+ help="Ratio for blending the two models (0.0 to 1.0).",
+ choices=[(i / 10) for i in range(11)],
+ default=0.5,
+ )
+
+ # Parser for 'tensorboard' mode
+ subparsers.add_parser(
+ "tensorboard", help="Launch TensorBoard for monitoring training progress."
+ )
+
+ # Parser for 'download' mode
+ download_parser = subparsers.add_parser(
+ "download", help="Download a model from a provided link."
+ )
+ download_parser.add_argument(
+ "--model_link", type=str, help="Direct link to the model file.", required=True
+ )
+
+ # Parser for 'prerequisites' mode
+ prerequisites_parser = subparsers.add_parser(
+ "prerequisites", help="Install prerequisites for RVC."
+ )
+ prerequisites_parser.add_argument(
+ "--pretraineds_v1",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ default=True,
+ help="Download pretrained models for RVC v1.",
+ )
+ prerequisites_parser.add_argument(
+ "--pretraineds_v2",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ default=True,
+ help="Download pretrained models for RVC v2.",
+ )
+ prerequisites_parser.add_argument(
+ "--models",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ default=True,
+ help="Download additional models.",
+ )
+ prerequisites_parser.add_argument(
+ "--exe",
+ type=lambda x: bool(strtobool(x)),
+ choices=[True, False],
+ default=True,
+ help="Download required executables.",
+ )
+
+ # Parser for 'audio_analyzer' mode
+ audio_analyzer = subparsers.add_parser(
+ "audio_analyzer", help="Analyze an audio file."
+ )
+ audio_analyzer.add_argument(
+ "--input_path", type=str, help="Path to the input audio file.", required=True
+ )
+
+ # Parser for 'api' mode
+ api_parser = subparsers.add_parser("api", help="Start the RVC API server.")
+ api_parser.add_argument(
+ "--host", type=str, help="Host address for the API server.", default="127.0.0.1"
+ )
+ api_parser.add_argument(
+ "--port", type=int, help="Port for the API server.", default=8000
+ )
+
+ return parser.parse_args()
+
+
+def main():
+ if len(sys.argv) == 1:
+ print("Please run the script with '-h' for more information.")
+ sys.exit(1)
+
+ args = parse_arguments()
+
+ try:
+ if args.mode == "infer":
+ run_infer_script(
+ pitch=args.pitch,
+ filter_radius=args.filter_radius,
+ index_rate=args.index_rate,
+ volume_envelope=args.volume_envelope,
+ protect=args.protect,
+ hop_length=args.hop_length,
+ f0_method=args.f0_method,
+ input_path=args.input_path,
+ output_path=args.output_path,
+ pth_path=args.pth_path,
+ index_path=args.index_path,
+ split_audio=args.split_audio,
+ f0_autotune=args.f0_autotune,
+ clean_audio=args.clean_audio,
+ clean_strength=args.clean_strength,
+ export_format=args.export_format,
+ embedder_model=args.embedder_model,
+ embedder_model_custom=args.embedder_model_custom,
+ upscale_audio=args.upscale_audio,
+ f0_file=args.f0_file,
+ )
+ elif args.mode == "batch_infer":
+ run_batch_infer_script(
+ pitch=args.pitch,
+ filter_radius=args.filter_radius,
+ index_rate=args.index_rate,
+ volume_envelope=args.volume_envelope,
+ protect=args.protect,
+ hop_length=args.hop_length,
+ f0_method=args.f0_method,
+ input_folder=args.input_folder,
+ output_folder=args.output_folder,
+ pth_path=args.pth_path,
+ index_path=args.index_path,
+ split_audio=args.split_audio,
+ f0_autotune=args.f0_autotune,
+ clean_audio=args.clean_audio,
+ clean_strength=args.clean_strength,
+ export_format=args.export_format,
+ embedder_model=args.embedder_model,
+ embedder_model_custom=args.embedder_model_custom,
+ upscale_audio=args.upscale_audio,
+ f0_file=args.f0_file,
+ )
+ elif args.mode == "tts":
+ run_tts_script(
+ tts_text=args.tts_text,
+ tts_voice=args.tts_voice,
+ tts_rate=args.tts_rate,
+ pitch=args.pitch,
+ filter_radius=args.filter_radius,
+ index_rate=args.index_rate,
+ volume_envelope=args.volume_envelope,
+ protect=args.protect,
+ hop_length=args.hop_length,
+ f0_method=args.f0_method,
+ input_path=args.input_path,
+ output_path=args.output_path,
+ pth_path=args.pth_path,
+ index_path=args.index_path,
+ split_audio=args.split_audio,
+ f0_autotune=args.f0_autotune,
+ clean_audio=args.clean_audio,
+ clean_strength=args.clean_strength,
+ export_format=args.export_format,
+ embedder_model=args.embedder_model,
+ embedder_model_custom=args.embedder_model_custom,
+ upscale_audio=args.upscale_audio,
+ f0_file=args.f0_file,
+ )
+ elif args.mode == "preprocess":
+ run_preprocess_script(
+ model_name=args.model_name,
+ dataset_path=args.dataset_path,
+ sample_rate=args.sample_rate,
+ cpu_cores=args.cpu_cores,
+ cut_preprocess=args.cut_preprocess,
+ process_effects=args.process_effects,
+ )
+ elif args.mode == "extract":
+ run_extract_script(
+ model_name=args.model_name,
+ rvc_version=args.rvc_version,
+ f0_method=args.f0_method,
+ pitch_guidance=args.pitch_guidance,
+ hop_length=args.hop_length,
+ cpu_cores=args.cpu_cores,
+ gpu=args.gpu,
+ sample_rate=args.sample_rate,
+ embedder_model=args.embedder_model,
+ embedder_model_custom=args.embedder_model_custom,
+ )
+ elif args.mode == "train":
+ run_train_script(
+ model_name=args.model_name,
+ rvc_version=args.rvc_version,
+ save_every_epoch=args.save_every_epoch,
+ save_only_latest=args.save_only_latest,
+ save_every_weights=args.save_every_weights,
+ total_epoch=args.total_epoch,
+ sample_rate=args.sample_rate,
+ batch_size=args.batch_size,
+ gpu=args.gpu,
+ pitch_guidance=args.pitch_guidance,
+ overtraining_detector=args.overtraining_detector,
+ overtraining_threshold=args.overtraining_threshold,
+ pretrained=args.pretrained,
+ custom_pretrained=args.custom_pretrained,
+ sync_graph=args.sync_graph,
+ index_algorithm=args.index_algorithm,
+ cache_data_in_gpu=args.cache_data_in_gpu,
+ g_pretrained_path=args.g_pretrained_path,
+ d_pretrained_path=args.d_pretrained_path,
+ )
+ elif args.mode == "index":
+ run_index_script(
+ model_name=args.model_name,
+ rvc_version=args.rvc_version,
+ index_algorithm=args.index_algorithm,
+ )
+ elif args.mode == "model_extract":
+ run_model_extract_script(
+ pth_path=args.pth_path,
+ model_name=args.model_name,
+ sample_rate=args.sample_rate,
+ pitch_guidance=args.pitch_guidance,
+ rvc_version=args.rvc_version,
+ epoch=args.epoch,
+ step=args.step,
+ )
+ elif args.mode == "model_information":
+ run_model_information_script(
+ pth_path=args.pth_path,
+ )
+ elif args.mode == "model_blender":
+ run_model_blender_script(
+ model_name=args.model_name,
+ pth_path_1=args.pth_path_1,
+ pth_path_2=args.pth_path_2,
+ ratio=args.ratio,
+ )
+ elif args.mode == "tensorboard":
+ run_tensorboard_script()
+ elif args.mode == "download":
+ run_download_script(
+ model_link=args.model_link,
+ )
+ elif args.mode == "prerequisites":
+ run_prerequisites_script(
+ pretraineds_v1=args.pretraineds_v1,
+ pretraineds_v2=args.pretraineds_v2,
+ models=args.models,
+ exe=args.exe,
+ )
+ elif args.mode == "audio_analyzer":
+ run_audio_analyzer_script(
+ input_path=args.input_path,
+ )
+ elif args.mode == "api":
+ run_api_script(
+ ip=args.host,
+ port=args.port,
+ )
+ except Exception as error:
+ print(f"An error occurred during execution: {error}")
+
+ import traceback
+
+ traceback.print_exc()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c40724ead38d497a48b6f05bf591e0f72b7f2d8f
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '1'
+
+services:
+ applio:
+ build:
+ context: ./
+ dockerfile: Dockerfile
+ ports:
+ - "6969"
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: 1
+ capabilities: [gpu]
\ No newline at end of file
diff --git a/logs/mute/f0/mute.wav.npy b/logs/mute/f0/mute.wav.npy
new file mode 100644
index 0000000000000000000000000000000000000000..a7ecfbf9295b11a58fa1316e03ac3d0e85fa3ad6
Binary files /dev/null and b/logs/mute/f0/mute.wav.npy differ
diff --git a/logs/mute/f0_voiced/mute.wav.npy b/logs/mute/f0_voiced/mute.wav.npy
new file mode 100644
index 0000000000000000000000000000000000000000..cf5c21bd4c9bfca9d8a39708454eee44757e608d
Binary files /dev/null and b/logs/mute/f0_voiced/mute.wav.npy differ
diff --git a/logs/mute/sliced_audios/mute32000.wav b/logs/mute/sliced_audios/mute32000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..b4b5029205bf72dee5856bbe0c65c34337dc8dd4
Binary files /dev/null and b/logs/mute/sliced_audios/mute32000.wav differ
diff --git a/logs/mute/sliced_audios/mute40000.wav b/logs/mute/sliced_audios/mute40000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..fcf1281d4d1b01417a4d6738022f4402594a6802
Binary files /dev/null and b/logs/mute/sliced_audios/mute40000.wav differ
diff --git a/logs/mute/sliced_audios/mute48000.wav b/logs/mute/sliced_audios/mute48000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..72822a01251e77d7d2a4a7da9d94805426829083
Binary files /dev/null and b/logs/mute/sliced_audios/mute48000.wav differ
diff --git a/logs/mute/sliced_audios_16k/mute.wav b/logs/mute/sliced_audios_16k/mute.wav
new file mode 100644
index 0000000000000000000000000000000000000000..27a7d638558539c521aacf8c0f34bd0d4816aa9d
Binary files /dev/null and b/logs/mute/sliced_audios_16k/mute.wav differ
diff --git a/logs/mute/v1_extracted/mute.npy b/logs/mute/v1_extracted/mute.npy
new file mode 100644
index 0000000000000000000000000000000000000000..ffe35e78401ba03731e2ebe48ed7a73d94198a8f
Binary files /dev/null and b/logs/mute/v1_extracted/mute.npy differ
diff --git a/logs/mute/v2_extracted/mute.npy b/logs/mute/v2_extracted/mute.npy
new file mode 100644
index 0000000000000000000000000000000000000000..b14cfb83e490f0abd05cc011fd1f9f2cf8595619
Binary files /dev/null and b/logs/mute/v2_extracted/mute.npy differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36bd89a4686cb9b0a617efb74a04f974f9c264ae
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,49 @@
+# Core dependencies
+pip==23.3; sys_platform == 'darwin'
+wheel; sys_platform == 'darwin'
+PyYAML; sys_platform == 'darwin'
+numpy==1.23.5
+requests==2.32.0
+tqdm
+wget
+
+# Audio processing
+ffmpeg-python>=0.2.0
+faiss-cpu==1.7.3
+librosa==0.9.2
+pyworld==0.3.4
+scipy==1.11.1
+soundfile==0.12.1
+praat-parselmouth
+noisereduce
+audio_upscaler==0.1.4
+pedalboard
+
+# Machine learning and deep learning
+omegaconf==2.0.5; sys_platform == 'darwin'
+numba; sys_platform == 'linux'
+numba==0.57.0; sys_platform == 'darwin' or sys_platform == 'win32'
+torch==2.1.1
+torchaudio==2.1.1
+torchvision==0.16.1
+torchcrepe==0.0.23
+torchfcpe
+einops
+libf0
+transformers==4.44.2
+
+# Visualization and UI
+matplotlib==3.7.2
+tensorboard
+gradio==4.36.0
+
+# Miscellaneous utilities
+certifi==2024.7.4; sys_platform == 'darwin'
+antlr4-python3-runtime==4.8; sys_platform == 'darwin'
+ffmpy==0.3.1
+tensorboardX
+edge-tts==6.1.9
+pypresence
+beautifulsoup4
+flask
+local-attention
diff --git a/run-applio.sh b/run-applio.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f4bd501e0b53fcd5fe7bd5b0105e6131c4fe82fb
--- /dev/null
+++ b/run-applio.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+printf "\033]0;Applio\007"
+. .venv/bin/activate
+
+ export PYTORCH_ENABLE_MPS_FALLBACK=1
+ export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
+
+clear
+python app.py --open
diff --git a/run-install.sh b/run-install.sh
new file mode 100755
index 0000000000000000000000000000000000000000..563df03ae9de2be6aeb3bbed85fa2862cf938da8
--- /dev/null
+++ b/run-install.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+printf "\033]0;Installer\007"
+clear
+rm *.bat
+
+# Function to create or activate a virtual environment
+prepare_install() {
+ if [ -d ".venv" ]; then
+ echo "Venv found. This implies Applio has been already installed or this is a broken install"
+ printf "Do you want to execute run-applio.sh? (Y/N): " >&2
+ read -r r
+ r=$(echo "$r" | tr '[:upper:]' '[:lower:]')
+ if [ "$r" = "y" ]; then
+ ./run-applio.sh && exit 1
+ else
+ echo "Ok! The installation will continue. Good luck!"
+ fi
+ . .venv/bin/activate
+ else
+ echo "Creating venv..."
+ requirements_file="requirements.txt"
+ echo "Checking if python exists"
+ if command -v python3.10 > /dev/null 2>&1; then
+ py=$(which python3.10)
+ echo "Using python3.10"
+ else
+ if python --version | grep -qE "3\.(7|8|9|10)\."; then
+ py=$(which python)
+ echo "Using python"
+ else
+ echo "Please install Python3 or 3.10 manually."
+ exit 1
+ fi
+ fi
+ $py -m venv .venv
+ . .venv/bin/activate
+ echo "Installing pip version less than 24.1..."
+ python -m pip install "pip<24.1"
+ echo
+ echo "Installing Applio dependencies..."
+ python -m pip install -r requirements.txt
+ python -m pip uninstall torch torchvision torchaudio -y
+ python -m pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121
+ finish
+ fi
+}
+
+# Function to finish installation (this should install missing dependencies)
+finish() {
+ # Check if required packages are installed and install them if not
+ if [ -f "${requirements_file}" ]; then
+ installed_packages=$(python -m pip freeze)
+ while IFS= read -r package; do
+ expr "${package}" : "^#.*" > /dev/null && continue
+ package_name=$(echo "${package}" | sed 's/[<>=!].*//')
+ if ! echo "${installed_packages}" | grep -q "${package_name}"; then
+ echo "${package_name} not found. Attempting to install..."
+ python -m pip install --upgrade "${package}"
+ fi
+ done < "${requirements_file}"
+ else
+ echo "${requirements_file} not found. Please ensure the requirements file with required packages exists."
+ exit 1
+ fi
+ clear
+ echo "Applio has been successfully downloaded. Run the file run-applio.sh to run the web interface!"
+ exit 0
+}
+
+# Loop to the main menu
+if [ "$(uname)" = "Darwin" ]; then
+ if ! command -v brew >/dev/null 2>&1; then
+ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+ else
+ brew install python@3.10
+ export PYTORCH_ENABLE_MPS_FALLBACK=1
+ export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
+ fi
+elif [ "$(uname)" != "Linux" ]; then
+ echo "Unsupported operating system. Are you using Windows...?"
+ echo "If yes, use the batch (.bat) file instead of this one!"
+ exit 1
+fi
+
+prepare_install
diff --git a/run-tensorboard.sh b/run-tensorboard.sh
new file mode 100644
index 0000000000000000000000000000000000000000..26e98b58beed3b1272b0313455d75785f83fa0d0
--- /dev/null
+++ b/run-tensorboard.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+printf "\033]0;Tensorboard\007"
+. .venv/bin/activate
+
+clear
+python core.py tensorboard
\ No newline at end of file
diff --git a/rvc/configs/config.py b/rvc/configs/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..51eb0e2bb72063101359b5d920455c79b48e4e6c
--- /dev/null
+++ b/rvc/configs/config.py
@@ -0,0 +1,186 @@
+import torch
+import json
+import os
+
+
+version_config_paths = [
+ os.path.join("v1", "32000.json"),
+ os.path.join("v1", "40000.json"),
+ os.path.join("v1", "48000.json"),
+ os.path.join("v2", "48000.json"),
+ os.path.join("v2", "40000.json"),
+ os.path.join("v2", "32000.json"),
+]
+
+
+def singleton(cls):
+ instances = {}
+
+ def get_instance(*args, **kwargs):
+ if cls not in instances:
+ instances[cls] = cls(*args, **kwargs)
+ return instances[cls]
+
+ return get_instance
+
+
+@singleton
+class Config:
+ def __init__(self):
+ self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ self.is_half = self.device != "cpu"
+ self.gpu_name = (
+ torch.cuda.get_device_name(int(self.device.split(":")[-1]))
+ if self.device.startswith("cuda")
+ else None
+ )
+ self.json_config = self.load_config_json()
+ self.gpu_mem = None
+ self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
+
+ def load_config_json(self) -> dict:
+ configs = {}
+ for config_file in version_config_paths:
+ config_path = os.path.join("rvc", "configs", config_file)
+ with open(config_path, "r") as f:
+ configs[config_file] = json.load(f)
+ return configs
+
+ def has_mps(self) -> bool:
+ # Check if Metal Performance Shaders are available - for macOS 12.3+.
+ return torch.backends.mps.is_available()
+
+ def has_xpu(self) -> bool:
+ # Check if XPU is available.
+ return hasattr(torch, "xpu") and torch.xpu.is_available()
+
+ def set_precision(self, precision):
+ if precision not in ["fp32", "fp16"]:
+ raise ValueError("Invalid precision type. Must be 'fp32' or 'fp16'.")
+
+ fp16_run_value = precision == "fp16"
+ preprocess_target_version = "3.7" if precision == "fp16" else "3.0"
+ preprocess_path = os.path.join(
+ os.path.dirname(__file__),
+ os.pardir,
+ "rvc",
+ "train",
+ "preprocess",
+ "preprocess.py",
+ )
+
+ for config_path in version_config_paths:
+ full_config_path = os.path.join("rvc", "configs", config_path)
+ try:
+ with open(full_config_path, "r") as f:
+ config = json.load(f)
+ config["train"]["fp16_run"] = fp16_run_value
+ with open(full_config_path, "w") as f:
+ json.dump(config, f, indent=4)
+ except FileNotFoundError:
+ print(f"File not found: {full_config_path}")
+
+ if os.path.exists(preprocess_path):
+ with open(preprocess_path, "r") as f:
+ preprocess_content = f.read()
+ preprocess_content = preprocess_content.replace(
+ "3.0" if precision == "fp16" else "3.7", preprocess_target_version
+ )
+ with open(preprocess_path, "w") as f:
+ f.write(preprocess_content)
+
+ return f"Overwritten preprocess and config.json to use {precision}."
+
+ def get_precision(self):
+ if not version_config_paths:
+ raise FileNotFoundError("No configuration paths provided.")
+
+ full_config_path = os.path.join("rvc", "configs", version_config_paths[0])
+ try:
+ with open(full_config_path, "r") as f:
+ config = json.load(f)
+ fp16_run_value = config["train"].get("fp16_run", False)
+ precision = "fp16" if fp16_run_value else "fp32"
+ return precision
+ except FileNotFoundError:
+ print(f"File not found: {full_config_path}")
+ return None
+
+ def device_config(self) -> tuple:
+ if self.device.startswith("cuda"):
+ self.set_cuda_config()
+ elif self.has_mps():
+ self.device = "mps"
+ self.is_half = False
+ self.set_precision("fp32")
+ else:
+ self.device = "cpu"
+ self.is_half = False
+ self.set_precision("fp32")
+
+ # Configuration for 6GB GPU memory
+ x_pad, x_query, x_center, x_max = (
+ (3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
+ )
+ if self.gpu_mem is not None and self.gpu_mem <= 4:
+ # Configuration for 5GB GPU memory
+ x_pad, x_query, x_center, x_max = (1, 5, 30, 32)
+
+ return x_pad, x_query, x_center, x_max
+
+ def set_cuda_config(self):
+ i_device = int(self.device.split(":")[-1])
+ self.gpu_name = torch.cuda.get_device_name(i_device)
+ # Zluda
+ if self.gpu_name.endswith("[ZLUDA]"):
+ print("Zluda compatibility enabled, experimental feature.")
+ torch.backends.cudnn.enabled = False
+ torch.backends.cuda.enable_flash_sdp(False)
+ torch.backends.cuda.enable_math_sdp(True)
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
+ low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"]
+ if (
+ any(gpu in self.gpu_name for gpu in low_end_gpus)
+ and "V100" not in self.gpu_name.upper()
+ ):
+ self.is_half = False
+ self.set_precision("fp32")
+
+ self.gpu_mem = torch.cuda.get_device_properties(i_device).total_memory // (
+ 1024**3
+ )
+
+
+def max_vram_gpu(gpu):
+ if torch.cuda.is_available():
+ gpu_properties = torch.cuda.get_device_properties(gpu)
+ total_memory_gb = round(gpu_properties.total_memory / 1024 / 1024 / 1024)
+ return total_memory_gb
+ else:
+ return "0"
+
+
+def get_gpu_info():
+ ngpu = torch.cuda.device_count()
+ gpu_infos = []
+ if torch.cuda.is_available() or ngpu != 0:
+ for i in range(ngpu):
+ gpu_name = torch.cuda.get_device_name(i)
+ mem = int(
+ torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024
+ + 0.4
+ )
+ gpu_infos.append(f"{i}: {gpu_name} ({mem} GB)")
+ if len(gpu_infos) > 0:
+ gpu_info = "\n".join(gpu_infos)
+ else:
+ gpu_info = "Unfortunately, there is no compatible GPU available to support your training."
+ return gpu_info
+
+
+def get_number_of_gpus():
+ if torch.cuda.is_available():
+ num_gpus = torch.cuda.device_count()
+ return "-".join(map(str, range(num_gpus)))
+ else:
+ return "-"
diff --git a/rvc/configs/v1/32000.json b/rvc/configs/v1/32000.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f28f4f68083acbca0d2d7864aaed24d67df7f53
--- /dev/null
+++ b/rvc/configs/v1/32000.json
@@ -0,0 +1,47 @@
+{
+ "train": {
+ "log_interval": 200,
+ "seed": 1234,
+ "epochs": 20000,
+ "learning_rate": 1e-4,
+ "betas": [0.8, 0.99],
+ "eps": 1e-9,
+ "batch_size": 4,
+ "fp16_run": true,
+ "lr_decay": 0.999875,
+ "segment_size": 12800,
+ "init_lr_ratio": 1,
+ "warmup_epochs": 0,
+ "c_mel": 45,
+ "c_kl": 1.0
+ },
+ "data": {
+ "max_wav_value": 32768.0,
+ "sample_rate": 32000,
+ "filter_length": 1024,
+ "hop_length": 320,
+ "win_length": 1024,
+ "n_mel_channels": 80,
+ "mel_fmin": 0.0,
+ "mel_fmax": null
+ },
+ "model": {
+ "inter_channels": 192,
+ "hidden_channels": 192,
+ "filter_channels": 768,
+ "text_enc_hidden_dim": 256,
+ "n_heads": 2,
+ "n_layers": 6,
+ "kernel_size": 3,
+ "p_dropout": 0,
+ "resblock": "1",
+ "resblock_kernel_sizes": [3,7,11],
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+ "upsample_rates": [10,4,2,2,2],
+ "upsample_initial_channel": 512,
+ "upsample_kernel_sizes": [16,16,4,4,4],
+ "use_spectral_norm": false,
+ "gin_channels": 256,
+ "spk_embed_dim": 109
+ }
+}
diff --git a/rvc/configs/v1/40000.json b/rvc/configs/v1/40000.json
new file mode 100644
index 0000000000000000000000000000000000000000..3961ddb6412c3a8c4310ec965f1fd20e3622d2f4
--- /dev/null
+++ b/rvc/configs/v1/40000.json
@@ -0,0 +1,47 @@
+{
+ "train": {
+ "log_interval": 200,
+ "seed": 1234,
+ "epochs": 20000,
+ "learning_rate": 1e-4,
+ "betas": [0.8, 0.99],
+ "eps": 1e-9,
+ "batch_size": 4,
+ "fp16_run": true,
+ "lr_decay": 0.999875,
+ "segment_size": 12800,
+ "init_lr_ratio": 1,
+ "warmup_epochs": 0,
+ "c_mel": 45,
+ "c_kl": 1.0
+ },
+ "data": {
+ "max_wav_value": 32768.0,
+ "sample_rate": 40000,
+ "filter_length": 2048,
+ "hop_length": 400,
+ "win_length": 2048,
+ "n_mel_channels": 125,
+ "mel_fmin": 0.0,
+ "mel_fmax": null
+ },
+ "model": {
+ "inter_channels": 192,
+ "hidden_channels": 192,
+ "filter_channels": 768,
+ "text_enc_hidden_dim": 256,
+ "n_heads": 2,
+ "n_layers": 6,
+ "kernel_size": 3,
+ "p_dropout": 0,
+ "resblock": "1",
+ "resblock_kernel_sizes": [3,7,11],
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+ "upsample_rates": [10,10,2,2],
+ "upsample_initial_channel": 512,
+ "upsample_kernel_sizes": [16,16,4,4],
+ "use_spectral_norm": false,
+ "gin_channels": 256,
+ "spk_embed_dim": 109
+ }
+}
diff --git a/rvc/configs/v1/48000.json b/rvc/configs/v1/48000.json
new file mode 100644
index 0000000000000000000000000000000000000000..41ea3b62f5c575d370ca1b8a66755959402950cb
--- /dev/null
+++ b/rvc/configs/v1/48000.json
@@ -0,0 +1,47 @@
+{
+ "train": {
+ "log_interval": 200,
+ "seed": 1234,
+ "epochs": 20000,
+ "learning_rate": 1e-4,
+ "betas": [0.8, 0.99],
+ "eps": 1e-9,
+ "batch_size": 4,
+ "fp16_run": true,
+ "lr_decay": 0.999875,
+ "segment_size": 11520,
+ "init_lr_ratio": 1,
+ "warmup_epochs": 0,
+ "c_mel": 45,
+ "c_kl": 1.0
+ },
+ "data": {
+ "max_wav_value": 32768.0,
+ "sample_rate": 48000,
+ "filter_length": 2048,
+ "hop_length": 480,
+ "win_length": 2048,
+ "n_mel_channels": 128,
+ "mel_fmin": 0.0,
+ "mel_fmax": null
+ },
+ "model": {
+ "inter_channels": 192,
+ "hidden_channels": 192,
+ "filter_channels": 768,
+ "text_enc_hidden_dim": 256,
+ "n_heads": 2,
+ "n_layers": 6,
+ "kernel_size": 3,
+ "p_dropout": 0,
+ "resblock": "1",
+ "resblock_kernel_sizes": [3,7,11],
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+ "upsample_rates": [10,6,2,2,2],
+ "upsample_initial_channel": 512,
+ "upsample_kernel_sizes": [16,16,4,4,4],
+ "use_spectral_norm": false,
+ "gin_channels": 256,
+ "spk_embed_dim": 109
+ }
+}
diff --git a/rvc/configs/v2/32000.json b/rvc/configs/v2/32000.json
new file mode 100644
index 0000000000000000000000000000000000000000..eabab7b5317c3b47963bc1f7ad4c1c002dbf1939
--- /dev/null
+++ b/rvc/configs/v2/32000.json
@@ -0,0 +1,43 @@
+{
+ "train": {
+ "log_interval": 200,
+ "seed": 1234,
+ "learning_rate": 1e-4,
+ "betas": [0.8, 0.99],
+ "eps": 1e-9,
+ "fp16_run": true,
+ "lr_decay": 0.999875,
+ "segment_size": 12800,
+ "c_mel": 45,
+ "c_kl": 1.0
+ },
+ "data": {
+ "max_wav_value": 32768.0,
+ "sample_rate": 32000,
+ "filter_length": 1024,
+ "hop_length": 320,
+ "win_length": 1024,
+ "n_mel_channels": 80,
+ "mel_fmin": 0.0,
+ "mel_fmax": null
+ },
+ "model": {
+ "inter_channels": 192,
+ "hidden_channels": 192,
+ "filter_channels": 768,
+ "text_enc_hidden_dim": 768,
+ "n_heads": 2,
+ "n_layers": 6,
+ "kernel_size": 3,
+ "p_dropout": 0,
+ "resblock": "1",
+ "resblock_kernel_sizes": [3,7,11],
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+ "upsample_rates": [10,8,2,2],
+ "upsample_initial_channel": 512,
+ "upsample_kernel_sizes": [20,16,4,4],
+ "use_spectral_norm": false,
+ "gin_channels": 256,
+ "spk_embed_dim": 109
+ }
+}
diff --git a/rvc/configs/v2/40000.json b/rvc/configs/v2/40000.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1ba44a9c0cfadb57d0fab15a62a4cf40872ffe8
--- /dev/null
+++ b/rvc/configs/v2/40000.json
@@ -0,0 +1,43 @@
+{
+ "train": {
+ "log_interval": 200,
+ "seed": 1234,
+ "learning_rate": 1e-4,
+ "betas": [0.8, 0.99],
+ "eps": 1e-9,
+ "fp16_run": true,
+ "lr_decay": 0.999875,
+ "segment_size": 12800,
+ "c_mel": 45,
+ "c_kl": 1.0
+ },
+ "data": {
+ "max_wav_value": 32768.0,
+ "sample_rate": 40000,
+ "filter_length": 2048,
+ "hop_length": 400,
+ "win_length": 2048,
+ "n_mel_channels": 125,
+ "mel_fmin": 0.0,
+ "mel_fmax": null
+ },
+ "model": {
+ "inter_channels": 192,
+ "hidden_channels": 192,
+ "filter_channels": 768,
+ "text_enc_hidden_dim": 768,
+ "n_heads": 2,
+ "n_layers": 6,
+ "kernel_size": 3,
+ "p_dropout": 0,
+ "resblock": "1",
+ "resblock_kernel_sizes": [3,7,11],
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+ "upsample_rates": [10,10,2,2],
+ "upsample_initial_channel": 512,
+ "upsample_kernel_sizes": [16,16,4,4],
+ "use_spectral_norm": false,
+ "gin_channels": 256,
+ "spk_embed_dim": 109
+ }
+}
diff --git a/rvc/configs/v2/48000.json b/rvc/configs/v2/48000.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a4da9f5c669d3a39644b7a8ae827ca454c2cb3f
--- /dev/null
+++ b/rvc/configs/v2/48000.json
@@ -0,0 +1,43 @@
+{
+ "train": {
+ "log_interval": 200,
+ "seed": 1234,
+ "learning_rate": 1e-4,
+ "betas": [0.8, 0.99],
+ "eps": 1e-9,
+ "fp16_run": true,
+ "lr_decay": 0.999875,
+ "segment_size": 17280,
+ "c_mel": 45,
+ "c_kl": 1.0
+ },
+ "data": {
+ "max_wav_value": 32768.0,
+ "sample_rate": 48000,
+ "filter_length": 2048,
+ "hop_length": 480,
+ "win_length": 2048,
+ "n_mel_channels": 128,
+ "mel_fmin": 0.0,
+ "mel_fmax": null
+ },
+ "model": {
+ "inter_channels": 192,
+ "hidden_channels": 192,
+ "filter_channels": 768,
+ "text_enc_hidden_dim": 768,
+ "n_heads": 2,
+ "n_layers": 6,
+ "kernel_size": 3,
+ "p_dropout": 0,
+ "resblock": "1",
+ "resblock_kernel_sizes": [3,7,11],
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+ "upsample_rates": [12,10,2,2],
+ "upsample_initial_channel": 512,
+ "upsample_kernel_sizes": [24,20,4,4],
+ "use_spectral_norm": false,
+ "gin_channels": 256,
+ "spk_embed_dim": 109
+ }
+}
diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..56cfd6a842836820b73439ceef72b9f20b480e80
--- /dev/null
+++ b/rvc/infer/infer.py
@@ -0,0 +1,968 @@
+import os
+import sys
+import time
+import torch
+import librosa
+import logging
+import traceback
+import numpy as np
+import soundfile as sf
+import noisereduce as nr
+from pedalboard import (
+ Pedalboard,
+ Chorus,
+ Distortion,
+ Reverb,
+ PitchShift,
+ Limiter,
+ Gain,
+ Bitcrush,
+ Clipping,
+ Compressor,
+ Delay,
+)
+
+from scipy.io import wavfile
+from audio_upscaler import upscale
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from rvc.infer.pipeline import Pipeline as VC
+from rvc.lib.utils import load_audio_infer, load_embedding
+from rvc.lib.tools.split_audio import process_audio, merge_audio
+from rvc.lib.algorithm.synthesizers import Synthesizer
+from rvc.configs.config import Config
+
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+logging.getLogger("faiss").setLevel(logging.WARNING)
+logging.getLogger("faiss.loader").setLevel(logging.WARNING)
+
+
+class VoiceConverter:
+ """
+ A class for performing voice conversion using the Retrieval-Based Voice Conversion (RVC) method.
+ """
+
+ def __init__(self):
+ """
+ Initializes the VoiceConverter with default configuration, and sets up models and parameters.
+ """
+ self.config = Config() # Load RVC configuration
+ self.hubert_model = (
+ None # Initialize the Hubert model (for embedding extraction)
+ )
+ self.last_embedder_model = None # Last used embedder model
+ self.tgt_sr = None # Target sampling rate for the output audio
+ self.net_g = None # Generator network for voice conversion
+ self.vc = None # Voice conversion pipeline instance
+ self.cpt = None # Checkpoint for loading model weights
+ self.version = None # Model version
+ self.n_spk = None # Number of speakers in the model
+ self.use_f0 = None # Whether the model uses F0
+
+ def load_hubert(self, embedder_model: str, embedder_model_custom: str = None):
+ """
+ Loads the HuBERT model for speaker embedding extraction.
+
+ Args:
+ embedder_model (str): Path to the pre-trained HuBERT model.
+ embedder_model_custom (str): Path to the custom HuBERT model.
+ """
+ self.hubert_model = load_embedding(embedder_model, embedder_model_custom)
+ self.hubert_model.to(self.config.device)
+ self.hubert_model = (
+ self.hubert_model.half()
+ if self.config.is_half
+ else self.hubert_model.float()
+ )
+ self.hubert_model.eval()
+
+ @staticmethod
+ def remove_audio_noise(input_audio_path, reduction_strength=0.7):
+ """
+ Removes noise from an audio file using the NoiseReduce library.
+
+ Args:
+ input_audio_path (str): Path to the input audio file.
+ reduction_strength (float): Strength of the noise reduction. Default is 0.7.
+ """
+ try:
+ rate, data = wavfile.read(input_audio_path)
+ reduced_noise = nr.reduce_noise(
+ y=data, sr=rate, prop_decrease=reduction_strength
+ )
+ return reduced_noise
+ except Exception as error:
+ print(f"An error occurred removing audio noise: {error}")
+ return None
+
+ @staticmethod
+ def convert_audio_format(input_path, output_path, output_format):
+ """
+ Converts an audio file to a specified output format.
+
+ Args:
+ input_path (str): Path to the input audio file.
+ output_path (str): Path to the output audio file.
+ output_format (str): Desired audio format (e.g., "WAV", "MP3").
+ """
+ try:
+ if output_format != "WAV":
+ print(f"Converting audio to {output_format} format...")
+ audio, sample_rate = librosa.load(input_path, sr=None)
+ common_sample_rates = [
+ 8000,
+ 11025,
+ 12000,
+ 16000,
+ 22050,
+ 24000,
+ 32000,
+ 44100,
+ 48000,
+ ]
+ target_sr = min(common_sample_rates, key=lambda x: abs(x - sample_rate))
+ audio = librosa.resample(
+ audio, orig_sr=sample_rate, target_sr=target_sr
+ )
+ sf.write(output_path, audio, target_sr, format=output_format.lower())
+ return output_path
+ except Exception as error:
+ print(f"An error occurred converting the audio format: {error}")
+
+ @staticmethod
+ def post_process_audio(
+ audio_input,
+ sample_rate,
+ reverb: bool,
+ reverb_room_size: float,
+ reverb_damping: float,
+ reverb_wet_level: float,
+ reverb_dry_level: float,
+ reverb_width: float,
+ reverb_freeze_mode: float,
+ pitch_shift: bool,
+ pitch_shift_semitones: int,
+ limiter: bool,
+ limiter_threshold: float,
+ limiter_release: float,
+ gain: bool,
+ gain_db: float,
+ distortion: bool,
+ distortion_gain: float,
+ chorus: bool,
+ chorus_rate: float,
+ chorus_depth: float,
+ chorus_delay: float,
+ chorus_feedback: float,
+ chorus_mix: float,
+ bitcrush: bool,
+ bitcrush_bit_depth: int,
+ clipping: bool,
+ clipping_threshold: float,
+ compressor: bool,
+ compressor_threshold: float,
+ compressor_ratio: float,
+ compressor_attack: float,
+ compressor_release: float,
+ delay: bool,
+ delay_seconds: float,
+ delay_feedback: float,
+ delay_mix: float,
+ audio_output_path: str,
+ ):
+ board = Pedalboard()
+ if reverb:
+ reverb = Reverb(
+ room_size=reverb_room_size,
+ damping=reverb_damping,
+ wet_level=reverb_wet_level,
+ dry_level=reverb_dry_level,
+ width=reverb_width,
+ freeze_mode=reverb_freeze_mode,
+ )
+ board.append(reverb)
+ if pitch_shift:
+ pitch_shift = PitchShift(semitones=pitch_shift_semitones)
+ board.append(pitch_shift)
+ if limiter:
+ limiter = Limiter(
+ threshold_db=limiter_threshold, release_ms=limiter_release
+ )
+ board.append(limiter)
+ if gain:
+ gain = Gain(gain_db=gain_db)
+ board.append(gain)
+ if distortion:
+ distortion = Distortion(drive_db=distortion_gain)
+ board.append(distortion)
+ if chorus:
+ chorus = Chorus(
+ rate_hz=chorus_rate,
+ depth=chorus_depth,
+ centre_delay_ms=chorus_delay,
+ feedback=chorus_feedback,
+ mix=chorus_mix,
+ )
+ board.append(chorus)
+ if bitcrush:
+ bitcrush = Bitcrush(bit_depth=bitcrush_bit_depth)
+ board.append(bitcrush)
+ if clipping:
+ clipping = Clipping(threshold_db=clipping_threshold)
+ board.append(clipping)
+ if compressor:
+ compressor = Compressor(
+ threshold_db=compressor_threshold,
+ ratio=compressor_ratio,
+ attack_ms=compressor_attack,
+ release_ms=compressor_release,
+ )
+ board.append(compressor)
+ if delay:
+ delay = Delay(
+ delay_seconds=delay_seconds,
+ feedback=delay_feedback,
+ mix=delay_mix,
+ )
+ board.append(delay)
+ audio_input, sample_rate = librosa.load(audio_input, sr=sample_rate)
+ output = board(audio_input, sample_rate)
+ sf.write(audio_output_path, output, sample_rate, format="WAV")
+ return audio_output_path
+
+ def convert_audio(
+ self,
+ audio_input_path: str,
+ audio_output_path: str,
+ model_path: str,
+ index_path: str,
+ embedder_model: str,
+ pitch: int,
+ f0_file: str,
+ f0_method: str,
+ index_rate: float,
+ volume_envelope: int,
+ protect: float,
+ hop_length: int,
+ split_audio: bool,
+ f0_autotune: bool,
+ filter_radius: int,
+ embedder_model_custom: str,
+ clean_audio: bool,
+ clean_strength: float,
+ export_format: str,
+ upscale_audio: bool,
+ formant_shifting: bool,
+ formant_qfrency: float,
+ formant_timbre: float,
+ post_process: bool,
+ reverb: bool,
+ pitch_shift: bool,
+ limiter: bool,
+ gain: bool,
+ distortion: bool,
+ chorus: bool,
+ bitcrush: bool,
+ clipping: bool,
+ compressor: bool,
+ delay: bool,
+ sliders: dict,
+ resample_sr: int = 0,
+ sid: int = 0,
+ ):
+ """
+ Performs voice conversion on the input audio.
+
+ Args:
+ audio_input_path (str): Path to the input audio file.
+ audio_output_path (str): Path to the output audio file.
+ model_path (str): Path to the voice conversion model.
+ index_path (str): Path to the index file.
+ sid (int, optional): Speaker ID. Default is 0.
+ pitch (str, optional): Key for F0 up-sampling. Default is None.
+ f0_file (str, optional): Path to the F0 file. Default is None.
+ f0_method (str, optional): Method for F0 extraction. Default is None.
+ index_rate (float, optional): Rate for index matching. Default is None.
+ resample_sr (int, optional): Resample sampling rate. Default is 0.
+ volume_envelope (float, optional): RMS mix rate. Default is None.
+ protect (float, optional): Protection rate for certain audio segments. Default is None.
+ hop_length (int, optional): Hop length for audio processing. Default is None.
+ split_audio (bool, optional): Whether to split the audio for processing. Default is False.
+ f0_autotune (bool, optional): Whether to use F0 autotune. Default is False.
+ filter_radius (int, optional): Radius for filtering. Default is None.
+ embedder_model (str, optional): Path to the embedder model. Default is None.
+ embedder_model_custom (str, optional): Path to the custom embedder model. Default is None.
+ clean_audio (bool, optional): Whether to clean the audio. Default is False.
+ clean_strength (float, optional): Strength of the audio cleaning. Default is 0.7.
+ export_format (str, optional): Format for exporting the audio. Default is "WAV".
+ upscale_audio (bool, optional): Whether to upscale the audio. Default is False.
+ formant_shift (bool, optional): Whether to shift the formants. Default is False.
+ formant_qfrency (float, optional): Formant frequency. Default is 1.0.
+ formant_timbre (float, optional): Formant timbre. Default is 1.0.
+ reverb (bool, optional): Whether to apply reverb. Default is False.
+ pitch_shift (bool, optional): Whether to apply pitch shift. Default is False.
+ limiter (bool, optional): Whether to apply a limiter. Default is False.
+ gain (bool, optional): Whether to apply gain. Default is False.
+ distortion (bool, optional): Whether to apply distortion. Default is False.
+ chorus (bool, optional): Whether to apply chorus. Default is False.
+ bitcrush (bool, optional): Whether to apply bitcrush. Default is False.
+ clipping (bool, optional): Whether to apply clipping. Default is False.
+ compressor (bool, optional): Whether to apply a compressor. Default is False.
+ delay (bool, optional): Whether to apply delay. Default is False.
+ sliders (dict, optional): Dictionary of effect parameters. Default is None.
+ """
+ self.get_vc(model_path, sid)
+
+ try:
+ start_time = time.time()
+ print(f"Converting audio '{audio_input_path}'...")
+
+ if upscale_audio == True:
+ upscale(audio_input_path, audio_input_path)
+ audio = load_audio_infer(
+ audio_input_path,
+ 16000,
+ formant_shifting,
+ formant_qfrency,
+ formant_timbre,
+ )
+ audio_max = np.abs(audio).max() / 0.95
+
+ if audio_max > 1:
+ audio /= audio_max
+
+ if not self.hubert_model or embedder_model != self.last_embedder_model:
+ self.load_hubert(embedder_model, embedder_model_custom)
+ self.last_embedder_model = embedder_model
+
+ file_index = (
+ index_path.strip()
+ .strip('"')
+ .strip("\n")
+ .strip('"')
+ .strip()
+ .replace("trained", "added")
+ )
+
+ if self.tgt_sr != resample_sr >= 16000:
+ self.tgt_sr = resample_sr
+
+ if split_audio:
+ result, new_dir_path = process_audio(audio_input_path)
+ if result == "Error":
+ return "Error with Split Audio", None
+
+ dir_path = (
+ new_dir_path.strip().strip('"').strip("\n").strip('"').strip()
+ )
+ if dir_path:
+ paths = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(dir_path, topdown=False)
+ for name in files
+ if name.endswith(".wav") and root == dir_path
+ ]
+ try:
+ for path in paths:
+ self.convert_audio(
+ audio_input_path=path,
+ audio_output_path=path,
+ model_path=model_path,
+ index_path=index_path,
+ sid=sid,
+ pitch=pitch,
+ f0_file=None,
+ f0_method=f0_method,
+ index_rate=index_rate,
+ resample_sr=resample_sr,
+ volume_envelope=volume_envelope,
+ protect=protect,
+ hop_length=hop_length,
+ split_audio=False,
+ f0_autotune=f0_autotune,
+ filter_radius=filter_radius,
+ export_format=export_format,
+ upscale_audio=upscale_audio,
+ embedder_model=embedder_model,
+ embedder_model_custom=embedder_model_custom,
+ clean_audio=clean_audio,
+ clean_strength=clean_strength,
+ formant_shifting=formant_shifting,
+ formant_qfrency=formant_qfrency,
+ formant_timbre=formant_timbre,
+ post_process=post_process,
+ reverb=reverb,
+ pitch_shift=pitch_shift,
+ limiter=limiter,
+ gain=gain,
+ distortion=distortion,
+ chorus=chorus,
+ bitcrush=bitcrush,
+ clipping=clipping,
+ compressor=compressor,
+ delay=delay,
+ sliders=sliders,
+ )
+ except Exception as error:
+ print(f"An error occurred processing the segmented audio: {error}")
+ print(traceback.format_exc())
+ return f"Error {error}"
+ print("Finished processing segmented audio, now merging audio...")
+ merge_timestamps_file = os.path.join(
+ os.path.dirname(new_dir_path),
+ f"{os.path.basename(audio_input_path).split('.')[0]}_timestamps.txt",
+ )
+ self.tgt_sr, audio_opt = merge_audio(merge_timestamps_file)
+ os.remove(merge_timestamps_file)
+ if post_process:
+ audio_opt = self.post_process_audio(
+ audio_input=audio_opt,
+ sample_rate=self.tgt_sr,
+ reverb=reverb,
+ reverb_room_size=sliders[0],
+ reverb_damping=sliders[1],
+ reverb_wet_level=sliders[2],
+ reverb_dry_level=sliders[3],
+ reverb_width=sliders[4],
+ reverb_freeze_mode=sliders[5],
+ pitch_shift=pitch_shift,
+ pitch_shift_semitones=sliders[6],
+ limiter=limiter,
+ limiter_threshold=sliders[7],
+ limiter_release=sliders[8],
+ gain=gain,
+ gain_db=sliders[9],
+ distortion=distortion,
+ distortion_gain=sliders[10],
+ chorus=chorus,
+ chorus_rate=sliders[11],
+ chorus_depth=sliders[12],
+ chorus_delay=sliders[13],
+ chorus_feedback=sliders[14],
+ chorus_mix=sliders[15],
+ bitcrush=bitcrush,
+ bitcrush_bit_depth=sliders[16],
+ clipping=clipping,
+ clipping_threshold=sliders[17],
+ compressor=compressor,
+ compressor_threshold=sliders[18],
+ compressor_ratio=sliders[19],
+ compressor_attack=sliders[20],
+ compressor_release=sliders[21],
+ delay=delay,
+ delay_seconds=sliders[22],
+ delay_feedback=sliders[23],
+ delay_mix=sliders[24],
+ audio_output_path=audio_output_path,
+ )
+ sf.write(audio_output_path, audio_opt, self.tgt_sr, format="WAV")
+ else:
+ audio_opt = self.vc.pipeline(
+ model=self.hubert_model,
+ net_g=self.net_g,
+ sid=sid,
+ audio=audio,
+ input_audio_path=audio_input_path,
+ pitch=pitch,
+ f0_method=f0_method,
+ file_index=file_index,
+ index_rate=index_rate,
+ pitch_guidance=self.use_f0,
+ filter_radius=filter_radius,
+ tgt_sr=self.tgt_sr,
+ resample_sr=resample_sr,
+ volume_envelope=volume_envelope,
+ version=self.version,
+ protect=protect,
+ hop_length=hop_length,
+ f0_autotune=f0_autotune,
+ f0_file=f0_file,
+ )
+
+ if audio_output_path:
+ sf.write(audio_output_path, audio_opt, self.tgt_sr, format="WAV")
+
+ if clean_audio:
+ cleaned_audio = self.remove_audio_noise(
+ audio_output_path, clean_strength
+ )
+ if cleaned_audio is not None:
+ sf.write(
+ audio_output_path, cleaned_audio, self.tgt_sr, format="WAV"
+ )
+ if post_process:
+ audio_output_path = self.post_process_audio(
+ audio_input=audio_output_path,
+ sample_rate=self.tgt_sr,
+ reverb=reverb,
+ reverb_room_size=sliders["reverb_room_size"],
+ reverb_damping=sliders["reverb_damping"],
+ reverb_wet_level=sliders["reverb_wet_level"],
+ reverb_dry_level=sliders["reverb_dry_level"],
+ reverb_width=sliders["reverb_width"],
+ reverb_freeze_mode=sliders["reverb_freeze_mode"],
+ pitch_shift=pitch_shift,
+ pitch_shift_semitones=sliders["pitch_shift_semitones"],
+ limiter=limiter,
+ limiter_threshold=sliders["limiter_threshold"],
+ limiter_release=sliders["limiter_release"],
+ gain=gain,
+ gain_db=sliders["gain_db"],
+ distortion=distortion,
+ distortion_gain=sliders["distortion_gain"],
+ chorus=chorus,
+ chorus_rate=sliders["chorus_rate"],
+ chorus_depth=sliders["chorus_depth"],
+ chorus_delay=sliders["chorus_delay"],
+ chorus_feedback=sliders["chorus_feedback"],
+ chorus_mix=sliders["chorus_mix"],
+ bitcrush=bitcrush,
+ bitcrush_bit_depth=sliders["bitcrush_bit_depth"],
+ clipping=clipping,
+ clipping_threshold=sliders["clipping_threshold"],
+ compressor=compressor,
+ compressor_threshold=sliders["compressor_threshold"],
+ compressor_ratio=sliders["compressor_ratio"],
+ compressor_attack=sliders["compressor_attack"],
+ compressor_release=sliders["compressor_release"],
+ delay=delay,
+ delay_seconds=sliders["delay_seconds"],
+ delay_feedback=sliders["delay_feedback"],
+ delay_mix=sliders["delay_mix"],
+ audio_output_path=audio_output_path,
+ )
+ output_path_format = audio_output_path.replace(
+ ".wav", f".{export_format.lower()}"
+ )
+ audio_output_path = self.convert_audio_format(
+ audio_output_path, output_path_format, export_format
+ )
+
+ elapsed_time = time.time() - start_time
+ print(
+ f"Conversion completed at '{audio_output_path}' in {elapsed_time:.2f} seconds."
+ )
+
+ except Exception as error:
+ print(f"An error occurred during audio conversion: {error}")
+ print(traceback.format_exc())
+
+ def convert_audio_batch(
+ self,
+ audio_input_paths: str,
+ audio_output_path: str,
+ model_path: str,
+ index_path: str,
+ embedder_model: str,
+ pitch: int,
+ f0_file: str,
+ f0_method: str,
+ index_rate: float,
+ volume_envelope: int,
+ protect: float,
+ hop_length: int,
+ split_audio: bool,
+ f0_autotune: bool,
+ filter_radius: int,
+ embedder_model_custom: str,
+ clean_audio: bool,
+ clean_strength: float,
+ export_format: str,
+ upscale_audio: bool,
+ formant_shifting: bool,
+ formant_qfrency: float,
+ formant_timbre: float,
+ resample_sr: int = 0,
+ sid: int = 0,
+ pid_file_path: str = None,
+ post_process: bool = False,
+ reverb: bool = False,
+ pitch_shift: bool = False,
+ limiter: bool = False,
+ gain: bool = False,
+ distortion: bool = False,
+ chorus: bool = False,
+ bitcrush: bool = False,
+ clipping: bool = False,
+ compressor: bool = False,
+ delay: bool = False,
+ sliders: dict = None,
+ ):
+ """
+ Performs voice conversion on a batch of input audio files.
+
+ Args:
+ audio_input_paths (list): List of paths to the input audio files.
+ audio_output_path (str): Path to the output audio file.
+ model_path (str): Path to the voice conversion model.
+ index_path (str): Path to the index file.
+ sid (int, optional): Speaker ID. Default is 0.
+ pitch (str, optional): Key for F0 up-sampling. Default is None.
+ f0_file (str, optional): Path to the F0 file. Default is None.
+ f0_method (str, optional): Method for F0 extraction. Default is None.
+ index_rate (float, optional): Rate for index matching. Default is None.
+ resample_sr (int, optional): Resample sampling rate. Default is 0.
+ volume_envelope (float, optional): RMS mix rate. Default is None.
+ protect (float, optional): Protection rate for certain audio segments. Default is None.
+ hop_length (int, optional): Hop length for audio processing. Default is None.
+ split_audio (bool, optional): Whether to split the audio for processing. Default is False.
+ f0_autotune (bool, optional): Whether to use F0 autotune. Default is False.
+ filter_radius (int, optional): Radius for filtering. Default is None.
+ embedder_model (str, optional): Path to the embedder model. Default is None.
+ embedder_model_custom (str, optional): Path to the custom embedder model. Default is None.
+ clean_audio (bool, optional): Whether to clean the audio. Default is False.
+ clean_strength (float, optional): Strength of the audio cleaning. Default is 0.7.
+ export_format (str, optional): Format for exporting the audio. Default is "WAV".
+ upscale_audio (bool, optional): Whether to upscale the audio. Default is False.
+ formant_shift (bool, optional): Whether to shift the formants. Default is False.
+ formant_qfrency (float, optional): Formant frequency. Default is 1.0.
+ formant_timbre (float, optional): Formant timbre. Default is 1.0.
+ pid_file_path (str, optional): Path to the PID file. Default is None.
+ post_process (bool, optional): Whether to apply post-processing effects. Default is False.
+ reverb (bool, optional): Whether to apply reverb. Default is False.
+ pitch_shift (bool, optional): Whether to apply pitch shift. Default is False.
+ limiter (bool, optional): Whether to apply a limiter. Default is False.
+ gain (bool, optional): Whether to apply gain. Default is False.
+ distortion (bool, optional): Whether to apply distortion. Default is False.
+ chorus (bool, optional): Whether to apply chorus. Default is False.
+ bitcrush (bool, optional): Whether to apply bitcrush. Default is False.
+ clipping (bool, optional): Whether to apply clipping. Default is False.
+ compressor (bool, optional): Whether to apply a compressor. Default is False.
+ delay (bool, optional): Whether to apply delay. Default is False.
+ sliders (dict, optional): Dictionary of effect parameters. Default is None.
+
+ """
+ pid = os.getpid()
+ with open(pid_file_path, "w") as pid_file:
+ pid_file.write(str(pid))
+ try:
+ if not self.hubert_model or embedder_model != self.last_embedder_model:
+ self.load_hubert(embedder_model, embedder_model_custom)
+ self.last_embedder_model = embedder_model
+ self.get_vc(model_path, sid)
+ file_index = (
+ index_path.strip()
+ .strip('"')
+ .strip("\n")
+ .strip('"')
+ .strip()
+ .replace("trained", "added")
+ )
+ start_time = time.time()
+ print(f"Converting audio batch '{audio_input_paths}'...")
+ audio_files = [
+ f
+ for f in os.listdir(audio_input_paths)
+ if f.endswith((".mp3", ".wav", ".flac", ".m4a", ".ogg", ".opus"))
+ ]
+ print(f"Detected {len(audio_files)} audio files for inference.")
+ for i, audio_input_path in enumerate(audio_files):
+ audio_output_paths = os.path.join(
+ audio_output_path,
+ f"{os.path.splitext(os.path.basename(audio_input_path))[0]}_output.{export_format.lower()}",
+ )
+ if os.path.exists(audio_output_paths):
+ continue
+ print(f"Converting audio '{audio_input_path}'...")
+ audio_input_path = os.path.join(audio_input_paths, audio_input_path)
+
+ if upscale_audio == True:
+ upscale(audio_input_path, audio_input_path)
+ audio = load_audio_infer(
+ audio_input_path,
+ 16000,
+ formant_shifting,
+ formant_qfrency,
+ formant_timbre,
+ )
+ audio_max = np.abs(audio).max() / 0.95
+
+ if audio_max > 1:
+ audio /= audio_max
+
+ if self.tgt_sr != resample_sr >= 16000:
+ self.tgt_sr = resample_sr
+
+ if split_audio:
+ result, new_dir_path = process_audio(audio_input_path)
+ if result == "Error":
+ return "Error with Split Audio", None
+
+ dir_path = (
+ new_dir_path.strip().strip('"').strip("\n").strip('"').strip()
+ )
+ if dir_path:
+ paths = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(dir_path, topdown=False)
+ for name in files
+ if name.endswith(".wav") and root == dir_path
+ ]
+ try:
+ for path in paths:
+ self.convert_audio(
+ audio_input_path=path,
+ audio_output_path=path,
+ model_path=model_path,
+ index_path=index_path,
+ sid=sid,
+ pitch=pitch,
+ f0_file=None,
+ f0_method=f0_method,
+ index_rate=index_rate,
+ resample_sr=resample_sr,
+ volume_envelope=volume_envelope,
+ protect=protect,
+ hop_length=hop_length,
+ split_audio=False,
+ f0_autotune=f0_autotune,
+ filter_radius=filter_radius,
+ export_format=export_format,
+ upscale_audio=upscale_audio,
+ embedder_model=embedder_model,
+ embedder_model_custom=embedder_model_custom,
+ clean_audio=clean_audio,
+ clean_strength=clean_strength,
+ formant_shifting=formant_shifting,
+ formant_qfrency=formant_qfrency,
+ formant_timbre=formant_timbre,
+ post_process=post_process,
+ reverb=reverb,
+ pitch_shift=pitch_shift,
+ limiter=limiter,
+ gain=gain,
+ distortion=distortion,
+ chorus=chorus,
+ bitcrush=bitcrush,
+ clipping=clipping,
+ compressor=compressor,
+ delay=delay,
+ sliders=sliders,
+ )
+ except Exception as error:
+ print(
+ f"An error occurred processing the segmented audio: {error}"
+ )
+ print(traceback.format_exc())
+ return f"Error {error}"
+ print("Finished processing segmented audio, now merging audio...")
+ merge_timestamps_file = os.path.join(
+ os.path.dirname(new_dir_path),
+ f"{os.path.basename(audio_input_path).split('.')[0]}_timestamps.txt",
+ )
+ self.tgt_sr, audio_opt = merge_audio(merge_timestamps_file)
+ os.remove(merge_timestamps_file)
+ if post_process:
+ audio_opt = self.post_process_audio(
+ audio_input=audio_opt,
+ sample_rate=self.tgt_sr,
+ reverb=reverb,
+ reverb_room_size=sliders[0],
+ reverb_damping=sliders[1],
+ reverb_wet_level=sliders[2],
+ reverb_dry_level=sliders[3],
+ reverb_width=sliders[4],
+ reverb_freeze_mode=sliders[5],
+ pitch_shift=pitch_shift,
+ pitch_shift_semitones=sliders[6],
+ limiter=limiter,
+ limiter_threshold=sliders[7],
+ limiter_release=sliders[8],
+ gain=gain,
+ gain_db=sliders[9],
+ distortion=distortion,
+ distortion_gain=sliders[10],
+ chorus=chorus,
+ chorus_rate=sliders[11],
+ chorus_depth=sliders[12],
+ chorus_delay=sliders[13],
+ chorus_feedback=sliders[14],
+ chorus_mix=sliders[15],
+ bitcrush=bitcrush,
+ bitcrush_bit_depth=sliders[16],
+ clipping=clipping,
+ clipping_threshold=sliders[17],
+ compressor=compressor,
+ compressor_threshold=sliders[18],
+ compressor_ratio=sliders[19],
+ compressor_attack=sliders[20],
+ compressor_release=sliders[21],
+ delay=delay,
+ delay_seconds=sliders[22],
+ delay_feedback=sliders[23],
+ delay_mix=sliders[24],
+ audio_output_path=audio_output_paths,
+ )
+ sf.write(
+ audio_output_paths, audio_opt, self.tgt_sr, format="WAV"
+ )
+ else:
+ audio_opt = self.vc.pipeline(
+ model=self.hubert_model,
+ net_g=self.net_g,
+ sid=sid,
+ audio=audio,
+ input_audio_path=audio_input_path,
+ pitch=pitch,
+ f0_method=f0_method,
+ file_index=file_index,
+ index_rate=index_rate,
+ pitch_guidance=self.use_f0,
+ filter_radius=filter_radius,
+ tgt_sr=self.tgt_sr,
+ resample_sr=resample_sr,
+ volume_envelope=volume_envelope,
+ version=self.version,
+ protect=protect,
+ hop_length=hop_length,
+ f0_autotune=f0_autotune,
+ f0_file=f0_file,
+ )
+
+ if audio_output_paths:
+ sf.write(audio_output_paths, audio_opt, self.tgt_sr, format="WAV")
+
+ if clean_audio:
+ cleaned_audio = self.remove_audio_noise(
+ audio_output_paths, clean_strength
+ )
+ if cleaned_audio is not None:
+ sf.write(
+ audio_output_paths, cleaned_audio, self.tgt_sr, format="WAV"
+ )
+ if post_process:
+ audio_output_paths = self.post_process_audio(
+ audio_input=audio_output_paths,
+ sample_rate=self.tgt_sr,
+ reverb=reverb,
+ reverb_room_size=sliders["reverb_room_size"],
+ reverb_damping=sliders["reverb_damping"],
+ reverb_wet_level=sliders["reverb_wet_level"],
+ reverb_dry_level=sliders["reverb_dry_level"],
+ reverb_width=sliders["reverb_width"],
+ reverb_freeze_mode=sliders["reverb_freeze_mode"],
+ pitch_shift=pitch_shift,
+ pitch_shift_semitones=sliders["pitch_shift_semitones"],
+ limiter=limiter,
+ limiter_threshold=sliders["limiter_threshold"],
+ limiter_release=sliders["limiter_release"],
+ gain=gain,
+ gain_db=sliders["gain_db"],
+ distortion=distortion,
+ distortion_gain=sliders["distortion_gain"],
+ chorus=chorus,
+ chorus_rate=sliders["chorus_rate"],
+ chorus_depth=sliders["chorus_depth"],
+ chorus_delay=sliders["chorus_delay"],
+ chorus_feedback=sliders["chorus_feedback"],
+ chorus_mix=sliders["chorus_mix"],
+ bitcrush=bitcrush,
+ bitcrush_bit_depth=sliders["bitcrush_bit_depth"],
+ clipping=clipping,
+ clipping_threshold=sliders["clipping_threshold"],
+ compressor=compressor,
+ compressor_threshold=sliders["compressor_threshold"],
+ compressor_ratio=sliders["compressor_ratio"],
+ compressor_attack=sliders["compressor_attack"],
+ compressor_release=sliders["compressor_release"],
+ delay=delay,
+ delay_seconds=sliders["delay_seconds"],
+ delay_feedback=sliders["delay_feedback"],
+ delay_mix=sliders["delay_mix"],
+ audio_output_path=audio_output_paths,
+ )
+ output_path_format = audio_output_paths.replace(
+ ".wav", f".{export_format.lower()}"
+ )
+ audio_output_paths = self.convert_audio_format(
+ audio_output_paths, output_path_format, export_format
+ )
+ print(f"Conversion completed at '{audio_output_paths}'.")
+ elapsed_time = time.time() - start_time
+ print(f"Batch conversion completed in {elapsed_time:.2f} seconds.")
+ os.remove(pid_file_path)
+ except Exception as error:
+ print(f"An error occurred during audio conversion: {error}")
+ print(traceback.format_exc())
+
+ def get_vc(self, weight_root, sid):
+ """
+ Loads the voice conversion model and sets up the pipeline.
+
+ Args:
+ weight_root (str): Path to the model weights.
+ sid (int): Speaker ID.
+ """
+ if sid == "" or sid == []:
+ self.cleanup_model()
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+
+ self.load_model(weight_root)
+
+ if self.cpt is not None:
+ self.setup_network()
+ self.setup_vc_instance()
+
+ def cleanup_model(self):
+ """
+ Cleans up the model and releases resources.
+ """
+ if self.hubert_model is not None:
+ del self.net_g, self.n_spk, self.vc, self.hubert_model, self.tgt_sr
+ self.hubert_model = self.net_g = self.n_spk = self.vc = self.tgt_sr = None
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+
+ del self.net_g, self.cpt
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ self.cpt = None
+
+ def load_model(self, weight_root):
+ """
+ Loads the model weights from the specified path.
+
+ Args:
+ weight_root (str): Path to the model weights.
+ """
+ self.cpt = (
+ torch.load(weight_root, map_location="cpu")
+ if os.path.isfile(weight_root)
+ else None
+ )
+
+ def setup_network(self):
+ """
+ Sets up the network configuration based on the loaded checkpoint.
+ """
+ if self.cpt is not None:
+ self.tgt_sr = self.cpt["config"][-1]
+ self.cpt["config"][-3] = self.cpt["weight"]["emb_g.weight"].shape[0]
+ self.use_f0 = self.cpt.get("f0", 1)
+
+ self.version = self.cpt.get("version", "v1")
+ self.text_enc_hidden_dim = 768 if self.version == "v2" else 256
+ self.net_g = Synthesizer(
+ *self.cpt["config"],
+ use_f0=self.use_f0,
+ text_enc_hidden_dim=self.text_enc_hidden_dim,
+ is_half=self.config.is_half,
+ )
+ del self.net_g.enc_q
+ self.net_g.load_state_dict(self.cpt["weight"], strict=False)
+ self.net_g.eval().to(self.config.device)
+ self.net_g = (
+ self.net_g.half() if self.config.is_half else self.net_g.float()
+ )
+
+ def setup_vc_instance(self):
+ """
+ Sets up the voice conversion pipeline instance based on the target sampling rate and configuration.
+ """
+ if self.cpt is not None:
+ self.vc = VC(self.tgt_sr, self.config)
+ self.n_spk = self.cpt["config"][-3]
diff --git a/rvc/infer/pipeline.py b/rvc/infer/pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eae01ae1ee2b341ad072333f1434e46814f212
--- /dev/null
+++ b/rvc/infer/pipeline.py
@@ -0,0 +1,683 @@
+import os
+import gc
+import re
+import sys
+import torch
+import torch.nn.functional as F
+import torchcrepe
+import faiss
+import librosa
+import numpy as np
+from scipy import signal
+from torch import Tensor
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from rvc.lib.predictors.RMVPE import RMVPE0Predictor
+from rvc.lib.predictors.FCPE import FCPEF0Predictor
+
+import logging
+
+logging.getLogger("faiss").setLevel(logging.WARNING)
+
+# Constants for high-pass filter
+FILTER_ORDER = 5
+CUTOFF_FREQUENCY = 48 # Hz
+SAMPLE_RATE = 16000 # Hz
+bh, ah = signal.butter(
+ N=FILTER_ORDER, Wn=CUTOFF_FREQUENCY, btype="high", fs=SAMPLE_RATE
+)
+
+input_audio_path2wav = {}
+
+
+class AudioProcessor:
+ """
+ A class for processing audio signals, specifically for adjusting RMS levels.
+ """
+
+ def change_rms(
+ source_audio: np.ndarray,
+ source_rate: int,
+ target_audio: np.ndarray,
+ target_rate: int,
+ rate: float,
+ ) -> np.ndarray:
+ """
+ Adjust the RMS level of target_audio to match the RMS of source_audio, with a given blending rate.
+
+ Args:
+ source_audio: The source audio signal as a NumPy array.
+ source_rate: The sampling rate of the source audio.
+ target_audio: The target audio signal to adjust.
+ target_rate: The sampling rate of the target audio.
+ rate: The blending rate between the source and target RMS levels.
+ """
+ # Calculate RMS of both audio data
+ rms1 = librosa.feature.rms(
+ y=source_audio,
+ frame_length=source_rate // 2 * 2,
+ hop_length=source_rate // 2,
+ )
+ rms2 = librosa.feature.rms(
+ y=target_audio,
+ frame_length=target_rate // 2 * 2,
+ hop_length=target_rate // 2,
+ )
+
+ # Interpolate RMS to match target audio length
+ rms1 = F.interpolate(
+ torch.from_numpy(rms1).float().unsqueeze(0),
+ size=target_audio.shape[0],
+ mode="linear",
+ ).squeeze()
+ rms2 = F.interpolate(
+ torch.from_numpy(rms2).float().unsqueeze(0),
+ size=target_audio.shape[0],
+ mode="linear",
+ ).squeeze()
+ rms2 = torch.maximum(rms2, torch.zeros_like(rms2) + 1e-6)
+
+ # Adjust target audio RMS based on the source audio RMS
+ adjusted_audio = (
+ target_audio
+ * (torch.pow(rms1, 1 - rate) * torch.pow(rms2, rate - 1)).numpy()
+ )
+ return adjusted_audio
+
+
+class Autotune:
+ """
+ A class for applying autotune to a given fundamental frequency (F0) contour.
+ """
+
+ def __init__(self, ref_freqs):
+ """
+ Initializes the Autotune class with a set of reference frequencies.
+
+ Args:
+ ref_freqs: A list of reference frequencies representing musical notes.
+ """
+ self.ref_freqs = ref_freqs
+ self.note_dict = self.generate_interpolated_frequencies()
+
+ def generate_interpolated_frequencies(self):
+ """
+ Generates a dictionary of interpolated frequencies between reference frequencies.
+ """
+ note_dict = []
+ for i in range(len(self.ref_freqs) - 1):
+ freq_low = self.ref_freqs[i]
+ freq_high = self.ref_freqs[i + 1]
+ interpolated_freqs = np.linspace(
+ freq_low, freq_high, num=10, endpoint=False
+ )
+ note_dict.extend(interpolated_freqs)
+ note_dict.append(self.ref_freqs[-1])
+ return note_dict
+
+ def autotune_f0(self, f0):
+ """
+ Autotunes a given F0 contour by snapping each frequency to the closest reference frequency.
+
+ Args:
+ f0: The input F0 contour as a NumPy array.
+ """
+ autotuned_f0 = np.zeros_like(f0)
+ for i, freq in enumerate(f0):
+ closest_note = min(self.note_dict, key=lambda x: abs(x - freq))
+ autotuned_f0[i] = closest_note
+ return autotuned_f0
+
+
+class Pipeline:
+ """
+ The main pipeline class for performing voice conversion, including preprocessing, F0 estimation,
+ voice conversion using a model, and post-processing.
+ """
+
+ def __init__(self, tgt_sr, config):
+ """
+ Initializes the Pipeline class with target sampling rate and configuration parameters.
+
+ Args:
+ tgt_sr: The target sampling rate for the output audio.
+ config: A configuration object containing various parameters for the pipeline.
+ """
+ self.x_pad = config.x_pad
+ self.x_query = config.x_query
+ self.x_center = config.x_center
+ self.x_max = config.x_max
+ self.is_half = config.is_half
+ self.sample_rate = 16000
+ self.window = 160
+ self.t_pad = self.sample_rate * self.x_pad
+ self.t_pad_tgt = tgt_sr * self.x_pad
+ self.t_pad2 = self.t_pad * 2
+ self.t_query = self.sample_rate * self.x_query
+ self.t_center = self.sample_rate * self.x_center
+ self.t_max = self.sample_rate * self.x_max
+ self.time_step = self.window / self.sample_rate * 1000
+ self.f0_min = 50
+ self.f0_max = 1100
+ self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
+ self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
+ self.device = config.device
+ self.ref_freqs = [
+ 65.41,
+ 82.41,
+ 110.00,
+ 146.83,
+ 196.00,
+ 246.94,
+ 329.63,
+ 440.00,
+ 587.33,
+ 783.99,
+ 1046.50,
+ ]
+ self.autotune = Autotune(self.ref_freqs)
+ self.note_dict = self.autotune.note_dict
+
+ def get_f0_crepe(
+ self,
+ x,
+ f0_min,
+ f0_max,
+ p_len,
+ hop_length,
+ model="full",
+ ):
+ """
+ Estimates the fundamental frequency (F0) of a given audio signal using the Crepe model.
+
+ Args:
+ x: The input audio signal as a NumPy array.
+ f0_min: Minimum F0 value to consider.
+ f0_max: Maximum F0 value to consider.
+ p_len: Desired length of the F0 output.
+ hop_length: Hop length for the Crepe model.
+ model: Crepe model size to use ("full" or "tiny").
+ """
+ x = x.astype(np.float32)
+ x /= np.quantile(np.abs(x), 0.999)
+ audio = torch.from_numpy(x).to(self.device, copy=True)
+ audio = torch.unsqueeze(audio, dim=0)
+ if audio.ndim == 2 and audio.shape[0] > 1:
+ audio = torch.mean(audio, dim=0, keepdim=True).detach()
+ audio = audio.detach()
+ pitch: Tensor = torchcrepe.predict(
+ audio,
+ self.sample_rate,
+ hop_length,
+ f0_min,
+ f0_max,
+ model,
+ batch_size=hop_length * 2,
+ device=self.device,
+ pad=True,
+ )
+ p_len = p_len or x.shape[0] // hop_length
+ source = np.array(pitch.squeeze(0).cpu().float().numpy())
+ source[source < 0.001] = np.nan
+ target = np.interp(
+ np.arange(0, len(source) * p_len, len(source)) / p_len,
+ np.arange(0, len(source)),
+ source,
+ )
+ f0 = np.nan_to_num(target)
+ return f0
+
+ def get_f0_hybrid(
+ self,
+ methods_str,
+ x,
+ f0_min,
+ f0_max,
+ p_len,
+ hop_length,
+ ):
+ """
+ Estimates the fundamental frequency (F0) using a hybrid approach combining multiple methods.
+
+ Args:
+ methods_str: A string specifying the methods to combine (e.g., "hybrid[crepe+rmvpe]").
+ x: The input audio signal as a NumPy array.
+ f0_min: Minimum F0 value to consider.
+ f0_max: Maximum F0 value to consider.
+ p_len: Desired length of the F0 output.
+ hop_length: Hop length for F0 estimation methods.
+ """
+ methods_str = re.search("hybrid\[(.+)\]", methods_str)
+ if methods_str:
+ methods = [method.strip() for method in methods_str.group(1).split("+")]
+ f0_computation_stack = []
+ print(f"Calculating f0 pitch estimations for methods {str(methods)}")
+ x = x.astype(np.float32)
+ x /= np.quantile(np.abs(x), 0.999)
+ for method in methods:
+ f0 = None
+ if method == "crepe":
+ f0 = self.get_f0_crepe_computation(
+ x, f0_min, f0_max, p_len, int(hop_length)
+ )
+ elif method == "rmvpe":
+ self.model_rmvpe = RMVPE0Predictor(
+ os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
+ is_half=self.is_half,
+ device=self.device,
+ )
+ f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
+ f0 = f0[1:]
+ elif method == "fcpe":
+ self.model_fcpe = FCPEF0Predictor(
+ os.path.join("rvc", "models", "predictors", "fcpe.pt"),
+ f0_min=int(f0_min),
+ f0_max=int(f0_max),
+ dtype=torch.float32,
+ device=self.device,
+ sample_rate=self.sample_rate,
+ threshold=0.03,
+ )
+ f0 = self.model_fcpe.compute_f0(x, p_len=p_len)
+ del self.model_fcpe
+ gc.collect()
+ f0_computation_stack.append(f0)
+
+ f0_computation_stack = [fc for fc in f0_computation_stack if fc is not None]
+ f0_median_hybrid = None
+ if len(f0_computation_stack) == 1:
+ f0_median_hybrid = f0_computation_stack[0]
+ else:
+ f0_median_hybrid = np.nanmedian(f0_computation_stack, axis=0)
+ return f0_median_hybrid
+
+ def get_f0(
+ self,
+ input_audio_path,
+ x,
+ p_len,
+ pitch,
+ f0_method,
+ filter_radius,
+ hop_length,
+ f0_autotune,
+ inp_f0=None,
+ ):
+ """
+ Estimates the fundamental frequency (F0) of a given audio signal using various methods.
+
+ Args:
+ input_audio_path: Path to the input audio file.
+ x: The input audio signal as a NumPy array.
+ p_len: Desired length of the F0 output.
+ pitch: Key to adjust the pitch of the F0 contour.
+ f0_method: Method to use for F0 estimation (e.g., "crepe").
+ filter_radius: Radius for median filtering the F0 contour.
+ hop_length: Hop length for F0 estimation methods.
+ f0_autotune: Whether to apply autotune to the F0 contour.
+ inp_f0: Optional input F0 contour to use instead of estimating.
+ """
+ global input_audio_path2wav
+ if f0_method == "crepe":
+ f0 = self.get_f0_crepe(x, self.f0_min, self.f0_max, p_len, int(hop_length))
+ elif f0_method == "crepe-tiny":
+ f0 = self.get_f0_crepe(
+ x, self.f0_min, self.f0_max, p_len, int(hop_length), "tiny"
+ )
+ elif f0_method == "rmvpe":
+ self.model_rmvpe = RMVPE0Predictor(
+ os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
+ is_half=self.is_half,
+ device=self.device,
+ )
+ f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
+ elif f0_method == "fcpe":
+ self.model_fcpe = FCPEF0Predictor(
+ os.path.join("rvc", "models", "predictors", "fcpe.pt"),
+ f0_min=int(self.f0_min),
+ f0_max=int(self.f0_max),
+ dtype=torch.float32,
+ device=self.device,
+ sample_rate=self.sample_rate,
+ threshold=0.03,
+ )
+ f0 = self.model_fcpe.compute_f0(x, p_len=p_len)
+ del self.model_fcpe
+ gc.collect()
+ elif "hybrid" in f0_method:
+ input_audio_path2wav[input_audio_path] = x.astype(np.double)
+ f0 = self.get_f0_hybrid(
+ f0_method,
+ x,
+ self.f0_min,
+ self.f0_max,
+ p_len,
+ hop_length,
+ )
+
+ if f0_autotune == "True":
+ f0 = Autotune.autotune_f0(self, f0)
+
+ f0 *= pow(2, pitch / 12)
+ tf0 = self.sample_rate // self.window
+ if inp_f0 is not None:
+ delta_t = np.round(
+ (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
+ ).astype("int16")
+ replace_f0 = np.interp(
+ list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
+ )
+ shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0]
+ f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[
+ :shape
+ ]
+ f0bak = f0.copy()
+ f0_mel = 1127 * np.log(1 + f0 / 700)
+ f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (
+ self.f0_mel_max - self.f0_mel_min
+ ) + 1
+ f0_mel[f0_mel <= 1] = 1
+ f0_mel[f0_mel > 255] = 255
+ f0_coarse = np.rint(f0_mel).astype(np.int)
+
+ return f0_coarse, f0bak
+
+ def voice_conversion(
+ self,
+ model,
+ net_g,
+ sid,
+ audio0,
+ pitch,
+ pitchf,
+ index,
+ big_npy,
+ index_rate,
+ version,
+ protect,
+ ):
+ """
+ Performs voice conversion on a given audio segment.
+
+ Args:
+ model: The feature extractor model.
+ net_g: The generative model for synthesizing speech.
+ sid: Speaker ID for the target voice.
+ audio0: The input audio segment.
+ pitch: Quantized F0 contour for pitch guidance.
+ pitchf: Original F0 contour for pitch guidance.
+ index: FAISS index for speaker embedding retrieval.
+ big_npy: Speaker embeddings stored in a NumPy array.
+ index_rate: Blending rate for speaker embedding retrieval.
+ version: Model version ("v1" or "v2").
+ protect: Protection level for preserving the original pitch.
+ """
+ feats = torch.from_numpy(audio0)
+ if self.is_half:
+ feats = feats.half()
+ else:
+ feats = feats.float()
+ if feats.dim() == 2:
+ feats = feats.mean(-1)
+ assert feats.dim() == 1, feats.dim()
+ feats = feats.view(1, -1)
+ padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
+
+ with torch.no_grad():
+ feats = model(feats.to(self.device))["last_hidden_state"]
+ feats = (
+ model.final_proj(feats[0]).unsqueeze(0) if version == "v1" else feats
+ )
+ if protect < 0.5 and pitch != None and pitchf != None:
+ feats0 = feats.clone()
+ if (
+ isinstance(index, type(None)) == False
+ and isinstance(big_npy, type(None)) == False
+ and index_rate != 0
+ ):
+ npy = feats[0].cpu().numpy()
+ if self.is_half:
+ npy = npy.astype("float32")
+
+ score, ix = index.search(npy, k=8)
+ weight = np.square(1 / score)
+ weight /= weight.sum(axis=1, keepdims=True)
+ npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
+
+ if self.is_half:
+ npy = npy.astype("float16")
+ feats = (
+ torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
+ + (1 - index_rate) * feats
+ )
+
+ feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
+ if protect < 0.5 and pitch != None and pitchf != None:
+ feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
+ 0, 2, 1
+ )
+ p_len = audio0.shape[0] // self.window
+ if feats.shape[1] < p_len:
+ p_len = feats.shape[1]
+ if pitch != None and pitchf != None:
+ pitch = pitch[:, :p_len]
+ pitchf = pitchf[:, :p_len]
+
+ if protect < 0.5 and pitch != None and pitchf != None:
+ pitchff = pitchf.clone()
+ pitchff[pitchf > 0] = 1
+ pitchff[pitchf < 1] = protect
+ pitchff = pitchff.unsqueeze(-1)
+ feats = feats * pitchff + feats0 * (1 - pitchff)
+ feats = feats.to(feats0.dtype)
+ p_len = torch.tensor([p_len], device=self.device).long()
+ with torch.no_grad():
+ if pitch != None and pitchf != None:
+ audio1 = (
+ (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0])
+ .data.cpu()
+ .float()
+ .numpy()
+ )
+ else:
+ audio1 = (
+ (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy()
+ )
+ del feats, p_len, padding_mask
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ return audio1
+
+ def pipeline(
+ self,
+ model,
+ net_g,
+ sid,
+ audio,
+ input_audio_path,
+ pitch,
+ f0_method,
+ file_index,
+ index_rate,
+ pitch_guidance,
+ filter_radius,
+ tgt_sr,
+ resample_sr,
+ volume_envelope,
+ version,
+ protect,
+ hop_length,
+ f0_autotune,
+ f0_file,
+ ):
+ """
+ The main pipeline function for performing voice conversion.
+
+ Args:
+ model: The feature extractor model.
+ net_g: The generative model for synthesizing speech.
+ sid: Speaker ID for the target voice.
+ audio: The input audio signal.
+ input_audio_path: Path to the input audio file.
+ pitch: Key to adjust the pitch of the F0 contour.
+ f0_method: Method to use for F0 estimation.
+ file_index: Path to the FAISS index file for speaker embedding retrieval.
+ index_rate: Blending rate for speaker embedding retrieval.
+ pitch_guidance: Whether to use pitch guidance during voice conversion.
+ filter_radius: Radius for median filtering the F0 contour.
+ tgt_sr: Target sampling rate for the output audio.
+ resample_sr: Resampling rate for the output audio.
+ volume_envelope: Blending rate for adjusting the RMS level of the output audio.
+ version: Model version.
+ protect: Protection level for preserving the original pitch.
+ hop_length: Hop length for F0 estimation methods.
+ f0_autotune: Whether to apply autotune to the F0 contour.
+ f0_file: Path to a file containing an F0 contour to use.
+ """
+ if file_index != "" and os.path.exists(file_index) == True and index_rate != 0:
+ try:
+ index = faiss.read_index(file_index)
+ big_npy = index.reconstruct_n(0, index.ntotal)
+ except Exception as error:
+ print(f"An error occurred reading the FAISS index: {error}")
+ index = big_npy = None
+ else:
+ index = big_npy = None
+ audio = signal.filtfilt(bh, ah, audio)
+ audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
+ opt_ts = []
+ if audio_pad.shape[0] > self.t_max:
+ audio_sum = np.zeros_like(audio)
+ for i in range(self.window):
+ audio_sum += audio_pad[i : i - self.window]
+ for t in range(self.t_center, audio.shape[0], self.t_center):
+ opt_ts.append(
+ t
+ - self.t_query
+ + np.where(
+ np.abs(audio_sum[t - self.t_query : t + self.t_query])
+ == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
+ )[0][0]
+ )
+ s = 0
+ audio_opt = []
+ t = None
+ audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
+ p_len = audio_pad.shape[0] // self.window
+ inp_f0 = None
+ if hasattr(f0_file, "name") == True:
+ try:
+ with open(f0_file.name, "r") as f:
+ lines = f.read().strip("\n").split("\n")
+ inp_f0 = []
+ for line in lines:
+ inp_f0.append([float(i) for i in line.split(",")])
+ inp_f0 = np.array(inp_f0, dtype="float32")
+ except Exception as error:
+ print(f"An error occurred reading the F0 file: {error}")
+ sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
+ if pitch_guidance == True:
+ pitch, pitchf = self.get_f0(
+ input_audio_path,
+ audio_pad,
+ p_len,
+ pitch,
+ f0_method,
+ filter_radius,
+ hop_length,
+ f0_autotune,
+ inp_f0,
+ )
+ pitch = pitch[:p_len]
+ pitchf = pitchf[:p_len]
+ if self.device == "mps":
+ pitchf = pitchf.astype(np.float32)
+ pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
+ pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
+ for t in opt_ts:
+ t = t // self.window * self.window
+ if pitch_guidance == True:
+ audio_opt.append(
+ self.voice_conversion(
+ model,
+ net_g,
+ sid,
+ audio_pad[s : t + self.t_pad2 + self.window],
+ pitch[:, s // self.window : (t + self.t_pad2) // self.window],
+ pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
+ index,
+ big_npy,
+ index_rate,
+ version,
+ protect,
+ )[self.t_pad_tgt : -self.t_pad_tgt]
+ )
+ else:
+ audio_opt.append(
+ self.voice_conversion(
+ model,
+ net_g,
+ sid,
+ audio_pad[s : t + self.t_pad2 + self.window],
+ None,
+ None,
+ index,
+ big_npy,
+ index_rate,
+ version,
+ protect,
+ )[self.t_pad_tgt : -self.t_pad_tgt]
+ )
+ s = t
+ if pitch_guidance == True:
+ audio_opt.append(
+ self.voice_conversion(
+ model,
+ net_g,
+ sid,
+ audio_pad[t:],
+ pitch[:, t // self.window :] if t is not None else pitch,
+ pitchf[:, t // self.window :] if t is not None else pitchf,
+ index,
+ big_npy,
+ index_rate,
+ version,
+ protect,
+ )[self.t_pad_tgt : -self.t_pad_tgt]
+ )
+ else:
+ audio_opt.append(
+ self.voice_conversion(
+ model,
+ net_g,
+ sid,
+ audio_pad[t:],
+ None,
+ None,
+ index,
+ big_npy,
+ index_rate,
+ version,
+ protect,
+ )[self.t_pad_tgt : -self.t_pad_tgt]
+ )
+ audio_opt = np.concatenate(audio_opt)
+ if volume_envelope != 1:
+ audio_opt = AudioProcessor.change_rms(
+ audio, self.sample_rate, audio_opt, tgt_sr, volume_envelope
+ )
+ if resample_sr >= self.sample_rate and tgt_sr != resample_sr:
+ audio_opt = librosa.resample(
+ audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
+ )
+ audio_max = np.abs(audio_opt).max() / 0.99
+ max_int16 = 32768
+ if audio_max > 1:
+ max_int16 /= audio_max
+ audio_opt = (audio_opt * max_int16).astype(np.int16)
+ del pitch, pitchf, sid
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ return audio_opt
diff --git a/rvc/lib/algorithm/__init__.py b/rvc/lib/algorithm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/rvc/lib/algorithm/attentions.py b/rvc/lib/algorithm/attentions.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4f47f8b9abfdf3e38521bda0b449e00dee553fe
--- /dev/null
+++ b/rvc/lib/algorithm/attentions.py
@@ -0,0 +1,292 @@
+import math
+import torch
+
+from rvc.lib.algorithm.commons import convert_pad_shape
+
+
+class MultiHeadAttention(torch.nn.Module):
+ """
+ Multi-head attention module with optional relative positional encoding and proximal bias.
+
+ Args:
+ channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ n_heads (int): Number of attention heads.
+ p_dropout (float, optional): Dropout probability. Defaults to 0.0.
+ window_size (int, optional): Window size for relative positional encoding. Defaults to None.
+ heads_share (bool, optional): Whether to share relative positional embeddings across heads. Defaults to True.
+ block_length (int, optional): Block length for local attention. Defaults to None.
+ proximal_bias (bool, optional): Whether to use proximal bias in self-attention. Defaults to False.
+ proximal_init (bool, optional): Whether to initialize the key projection weights the same as query projection weights. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ channels,
+ out_channels,
+ n_heads,
+ p_dropout=0.0,
+ window_size=None,
+ heads_share=True,
+ block_length=None,
+ proximal_bias=False,
+ proximal_init=False,
+ ):
+ super().__init__()
+ assert channels % n_heads == 0
+
+ self.channels = channels
+ self.out_channels = out_channels
+ self.n_heads = n_heads
+ self.p_dropout = p_dropout
+ self.window_size = window_size
+ self.heads_share = heads_share
+ self.block_length = block_length
+ self.proximal_bias = proximal_bias
+ self.proximal_init = proximal_init
+ self.attn = None
+
+ self.k_channels = channels // n_heads
+ self.conv_q = torch.nn.Conv1d(channels, channels, 1)
+ self.conv_k = torch.nn.Conv1d(channels, channels, 1)
+ self.conv_v = torch.nn.Conv1d(channels, channels, 1)
+ self.conv_o = torch.nn.Conv1d(channels, out_channels, 1)
+ self.drop = torch.nn.Dropout(p_dropout)
+
+ if window_size is not None:
+ n_heads_rel = 1 if heads_share else n_heads
+ rel_stddev = self.k_channels**-0.5
+ self.emb_rel_k = torch.nn.Parameter(
+ torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels)
+ * rel_stddev
+ )
+ self.emb_rel_v = torch.nn.Parameter(
+ torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels)
+ * rel_stddev
+ )
+
+ torch.nn.init.xavier_uniform_(self.conv_q.weight)
+ torch.nn.init.xavier_uniform_(self.conv_k.weight)
+ torch.nn.init.xavier_uniform_(self.conv_v.weight)
+ if proximal_init:
+ with torch.no_grad():
+ self.conv_k.weight.copy_(self.conv_q.weight)
+ self.conv_k.bias.copy_(self.conv_q.bias)
+
+ def forward(self, x, c, attn_mask=None):
+ q = self.conv_q(x)
+ k = self.conv_k(c)
+ v = self.conv_v(c)
+
+ x, self.attn = self.attention(q, k, v, mask=attn_mask)
+
+ x = self.conv_o(x)
+ return x
+
+ def attention(self, query, key, value, mask=None):
+ # reshape [b, d, t] -> [b, n_h, t, d_k]
+ b, d, t_s, t_t = (*key.size(), query.size(2))
+ query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
+ key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
+ value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
+
+ scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1))
+ if self.window_size is not None:
+ assert (
+ t_s == t_t
+ ), "Relative attention is only available for self-attention."
+ key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
+ rel_logits = self._matmul_with_relative_keys(
+ query / math.sqrt(self.k_channels), key_relative_embeddings
+ )
+ scores_local = self._relative_position_to_absolute_position(rel_logits)
+ scores = scores + scores_local
+ if self.proximal_bias:
+ assert t_s == t_t, "Proximal bias is only available for self-attention."
+ scores = scores + self._attention_bias_proximal(t_s).to(
+ device=scores.device, dtype=scores.dtype
+ )
+ if mask is not None:
+ scores = scores.masked_fill(mask == 0, -1e4)
+ if self.block_length is not None:
+ assert (
+ t_s == t_t
+ ), "Local attention is only available for self-attention."
+ block_mask = (
+ torch.ones_like(scores)
+ .triu(-self.block_length)
+ .tril(self.block_length)
+ )
+ scores = scores.masked_fill(block_mask == 0, -1e4)
+ p_attn = torch.nn.functional.softmax(scores, dim=-1) # [b, n_h, t_t, t_s]
+ p_attn = self.drop(p_attn)
+ output = torch.matmul(p_attn, value)
+ if self.window_size is not None:
+ relative_weights = self._absolute_position_to_relative_position(p_attn)
+ value_relative_embeddings = self._get_relative_embeddings(
+ self.emb_rel_v, t_s
+ )
+ output = output + self._matmul_with_relative_values(
+ relative_weights, value_relative_embeddings
+ )
+ output = (
+ output.transpose(2, 3).contiguous().view(b, d, t_t)
+ ) # [b, n_h, t_t, d_k] -> [b, d, t_t]
+ return output, p_attn
+
+ def _matmul_with_relative_values(self, x, y):
+ """
+ x: [b, h, l, m]
+ y: [h or 1, m, d]
+ ret: [b, h, l, d]
+ """
+ ret = torch.matmul(x, y.unsqueeze(0))
+ return ret
+
+ def _matmul_with_relative_keys(self, x, y):
+ """
+ x: [b, h, l, d]
+ y: [h or 1, m, d]
+ ret: [b, h, l, m]
+ """
+ ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
+ return ret
+
+ def _get_relative_embeddings(self, relative_embeddings, length):
+ # Pad first before slice to avoid using cond ops.
+ pad_length = max(length - (self.window_size + 1), 0)
+ slice_start_position = max((self.window_size + 1) - length, 0)
+ slice_end_position = slice_start_position + 2 * length - 1
+ if pad_length > 0:
+ padded_relative_embeddings = torch.nn.functional.pad(
+ relative_embeddings,
+ convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]),
+ )
+ else:
+ padded_relative_embeddings = relative_embeddings
+ used_relative_embeddings = padded_relative_embeddings[
+ :, slice_start_position:slice_end_position
+ ]
+ return used_relative_embeddings
+
+ def _relative_position_to_absolute_position(self, x):
+ """
+ x: [b, h, l, 2*l-1]
+ ret: [b, h, l, l]
+ """
+ batch, heads, length, _ = x.size()
+
+ # Concat columns of pad to shift from relative to absolute indexing.
+ x = torch.nn.functional.pad(
+ x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]])
+ )
+
+ # Concat extra elements so to add up to shape (len+1, 2*len-1).
+ x_flat = x.view([batch, heads, length * 2 * length])
+ x_flat = torch.nn.functional.pad(
+ x_flat, convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])
+ )
+
+ # Reshape and slice out the padded elements.
+ x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[
+ :, :, :length, length - 1 :
+ ]
+ return x_final
+
+ def _absolute_position_to_relative_position(self, x):
+ """
+ x: [b, h, l, l]
+ ret: [b, h, l, 2*l-1]
+ """
+ batch, heads, length, _ = x.size()
+ # padd along column
+ x = torch.nn.functional.pad(
+ x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])
+ )
+ x_flat = x.view([batch, heads, length**2 + length * (length - 1)])
+ # add 0's in the beginning that will skew the elements after reshape
+ x_flat = torch.nn.functional.pad(
+ x_flat, convert_pad_shape([[0, 0], [0, 0], [length, 0]])
+ )
+ x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:]
+ return x_final
+
+ def _attention_bias_proximal(self, length):
+ """Bias for self-attention to encourage attention to close positions.
+ Args:
+ length: an integer scalar.
+ """
+ r = torch.arange(length, dtype=torch.float32)
+ diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
+ return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
+
+
+class FFN(torch.nn.Module):
+ """
+ Feed-forward network module.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ filter_channels (int): Number of filter channels in the convolution layers.
+ kernel_size (int): Kernel size of the convolution layers.
+ p_dropout (float, optional): Dropout probability. Defaults to 0.0.
+ activation (str, optional): Activation function to use. Defaults to None.
+ causal (bool, optional): Whether to use causal padding in the convolution layers. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ filter_channels,
+ kernel_size,
+ p_dropout=0.0,
+ activation=None,
+ causal=False,
+ ):
+ super().__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.filter_channels = filter_channels
+ self.kernel_size = kernel_size
+ self.p_dropout = p_dropout
+ self.activation = activation
+ self.causal = causal
+
+ if causal:
+ self.padding = self._causal_padding
+ else:
+ self.padding = self._same_padding
+
+ self.conv_1 = torch.nn.Conv1d(in_channels, filter_channels, kernel_size)
+ self.conv_2 = torch.nn.Conv1d(filter_channels, out_channels, kernel_size)
+ self.drop = torch.nn.Dropout(p_dropout)
+
+ def forward(self, x, x_mask):
+ x = self.conv_1(self.padding(x * x_mask))
+ if self.activation == "gelu":
+ x = x * torch.sigmoid(1.702 * x)
+ else:
+ x = torch.relu(x)
+ x = self.drop(x)
+ x = self.conv_2(self.padding(x * x_mask))
+ return x * x_mask
+
+ def _causal_padding(self, x):
+ if self.kernel_size == 1:
+ return x
+ pad_l = self.kernel_size - 1
+ pad_r = 0
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+ x = torch.nn.functional.pad(x, convert_pad_shape(padding))
+ return x
+
+ def _same_padding(self, x):
+ if self.kernel_size == 1:
+ return x
+ pad_l = (self.kernel_size - 1) // 2
+ pad_r = self.kernel_size // 2
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
+ x = torch.nn.functional.pad(x, convert_pad_shape(padding))
+ return x
diff --git a/rvc/lib/algorithm/commons.py b/rvc/lib/algorithm/commons.py
new file mode 100644
index 0000000000000000000000000000000000000000..c76328c9188d48b296a297c0599a4d825dc9150f
--- /dev/null
+++ b/rvc/lib/algorithm/commons.py
@@ -0,0 +1,225 @@
+import math
+import torch
+from typing import List, Optional
+
+
+def init_weights(m, mean=0.0, std=0.01):
+ """
+ Initialize the weights of a module.
+
+ Args:
+ m: The module to initialize.
+ mean: The mean of the normal distribution.
+ std: The standard deviation of the normal distribution.
+ """
+ classname = m.__class__.__name__
+ if classname.find("Conv") != -1:
+ m.weight.data.normal_(mean, std)
+
+
+def get_padding(kernel_size, dilation=1):
+ """
+ Calculate the padding needed for a convolution.
+
+ Args:
+ kernel_size: The size of the kernel.
+ dilation: The dilation of the convolution.
+ """
+ return int((kernel_size * dilation - dilation) / 2)
+
+
+def convert_pad_shape(pad_shape):
+ """
+ Convert the pad shape to a list of integers.
+
+ Args:
+ pad_shape: The pad shape..
+ """
+ l = pad_shape[::-1]
+ pad_shape = [item for sublist in l for item in sublist]
+ return pad_shape
+
+
+def kl_divergence(m_p, logs_p, m_q, logs_q):
+ """
+ Calculate the KL divergence between two distributions.
+
+ Args:
+ m_p: The mean of the first distribution.
+ logs_p: The log of the standard deviation of the first distribution.
+ m_q: The mean of the second distribution.
+ logs_q: The log of the standard deviation of the second distribution.
+ """
+ kl = (logs_q - logs_p) - 0.5
+ kl += (
+ 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q)
+ )
+ return kl
+
+
+def slice_segments(
+ x: torch.Tensor, ids_str: torch.Tensor, segment_size: int = 4, dim: int = 2
+):
+ """
+ Slice segments from a tensor, handling tensors with different numbers of dimensions.
+
+ Args:
+ x (torch.Tensor): The tensor to slice.
+ ids_str (torch.Tensor): The starting indices of the segments.
+ segment_size (int, optional): The size of each segment. Defaults to 4.
+ dim (int, optional): The dimension to slice across (2D or 3D tensors). Defaults to 2.
+ """
+ if dim == 2:
+ ret = torch.zeros_like(x[:, :segment_size])
+ elif dim == 3:
+ ret = torch.zeros_like(x[:, :, :segment_size])
+
+ for i in range(x.size(0)):
+ idx_str = ids_str[i].item()
+ idx_end = idx_str + segment_size
+ if dim == 2:
+ ret[i] = x[i, idx_str:idx_end]
+ else:
+ ret[i] = x[i, :, idx_str:idx_end]
+
+ return ret
+
+
+def rand_slice_segments(x, x_lengths=None, segment_size=4):
+ """
+ Randomly slice segments from a tensor.
+
+ Args:
+ x: The tensor to slice.
+ x_lengths: The lengths of the sequences.
+ segment_size: The size of each segment.
+ """
+ b, d, t = x.size()
+ if x_lengths is None:
+ x_lengths = t
+ ids_str_max = x_lengths - segment_size + 1
+ ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
+ ret = slice_segments(x, ids_str, segment_size, dim=3)
+ return ret, ids_str
+
+
+def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4):
+ """
+ Generate a 1D timing signal.
+
+ Args:
+ length: The length of the signal.
+ channels: The number of channels of the signal.
+ min_timescale: The minimum timescale.
+ max_timescale: The maximum timescale.
+ """
+ position = torch.arange(length, dtype=torch.float)
+ num_timescales = channels // 2
+ log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / (
+ num_timescales - 1
+ )
+ inv_timescales = min_timescale * torch.exp(
+ torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment
+ )
+ scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1)
+ signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0)
+ signal = torch.nn.functional.pad(signal, [0, 0, 0, channels % 2])
+ signal = signal.view(1, channels, length)
+ return signal
+
+
+def subsequent_mask(length):
+ """
+ Generate a subsequent mask.
+
+ Args:
+ length: The length of the sequence.
+ """
+ mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
+ return mask
+
+
+@torch.jit.script
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+ """
+ Fused add tanh sigmoid multiply operation.
+
+ Args:
+ input_a: The first input tensor.
+ input_b: The second input tensor.
+ n_channels: The number of channels.
+ """
+ n_channels_int = n_channels[0]
+ in_act = input_a + input_b
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+ acts = t_act * s_act
+ return acts
+
+
+# Zluda, same as previous, but without jit.script
+def fused_add_tanh_sigmoid_multiply_no_jit(input_a, input_b, n_channels):
+ """
+ Fused add tanh sigmoid multiply operation.
+
+ Args:
+ input_a: The first input tensor.
+ input_b: The second input tensor.
+ n_channels: The number of channels.
+ """
+ n_channels_int = n_channels[0]
+ in_act = input_a + input_b
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+ acts = t_act * s_act
+ return acts
+
+
+def convert_pad_shape(pad_shape: List[List[int]]) -> List[int]:
+ """
+ Convert the pad shape to a list of integers.
+
+ Args:
+ pad_shape: The pad shape.
+ """
+ return torch.tensor(pad_shape).flip(0).reshape(-1).int().tolist()
+
+
+def sequence_mask(length: torch.Tensor, max_length: Optional[int] = None):
+ """
+ Generate a sequence mask.
+
+ Args:
+ length: The lengths of the sequences.
+ max_length: The maximum length of the sequences.
+ """
+ if max_length is None:
+ max_length = length.max()
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
+ return x.unsqueeze(0) < length.unsqueeze(1)
+
+
+def clip_grad_value(parameters, clip_value, norm_type=2):
+ """
+ Clip the gradients of a list of parameters.
+
+ Args:
+ parameters: The list of parameters to clip.
+ clip_value: The maximum value of the gradients.
+ norm_type: The type of norm to use for clipping.
+ """
+ if isinstance(parameters, torch.Tensor):
+ parameters = [parameters]
+ parameters = list(filter(lambda p: p.grad is not None, parameters))
+ norm_type = float(norm_type)
+ if clip_value is not None:
+ clip_value = float(clip_value)
+
+ total_norm = 0
+ for p in parameters:
+ param_norm = p.grad.data.norm(norm_type)
+ total_norm += param_norm.item() ** norm_type
+ if clip_value is not None:
+ p.grad.data.clamp_(min=-clip_value, max=clip_value)
+ total_norm = total_norm ** (1.0 / norm_type)
+ return total_norm
diff --git a/rvc/lib/algorithm/discriminators.py b/rvc/lib/algorithm/discriminators.py
new file mode 100644
index 0000000000000000000000000000000000000000..23f8c689464d9fcac3f07f7a04dc241627d926d7
--- /dev/null
+++ b/rvc/lib/algorithm/discriminators.py
@@ -0,0 +1,199 @@
+import torch
+from torch.nn.utils.parametrizations import spectral_norm, weight_norm
+
+from rvc.lib.algorithm.commons import get_padding
+from rvc.lib.algorithm.residuals import LRELU_SLOPE
+
+
+class MultiPeriodDiscriminator(torch.nn.Module):
+ """
+ Multi-period discriminator.
+
+ This class implements a multi-period discriminator, which is used to
+ discriminate between real and fake audio signals. The discriminator
+ is composed of a series of convolutional layers that are applied to
+ the input signal at different periods.
+
+ Args:
+ use_spectral_norm (bool): Whether to use spectral normalization.
+ Defaults to False.
+ """
+
+ def __init__(self, use_spectral_norm=False):
+ super(MultiPeriodDiscriminator, self).__init__()
+ periods = [2, 3, 5, 7, 11, 17]
+ self.discriminators = torch.nn.ModuleList(
+ [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
+ + [DiscriminatorP(p, use_spectral_norm=use_spectral_norm) for p in periods]
+ )
+
+ def forward(self, y, y_hat):
+ """
+ Forward pass of the multi-period discriminator.
+
+ Args:
+ y (torch.Tensor): Real audio signal.
+ y_hat (torch.Tensor): Fake audio signal.
+ """
+ y_d_rs, y_d_gs, fmap_rs, fmap_gs = [], [], [], []
+ for d in self.discriminators:
+ y_d_r, fmap_r = d(y)
+ y_d_g, fmap_g = d(y_hat)
+ y_d_rs.append(y_d_r)
+ y_d_gs.append(y_d_g)
+ fmap_rs.append(fmap_r)
+ fmap_gs.append(fmap_g)
+
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
+
+
+class MultiPeriodDiscriminatorV2(torch.nn.Module):
+ """
+ Multi-period discriminator V2.
+
+ This class implements a multi-period discriminator V2, which is used
+ to discriminate between real and fake audio signals. The discriminator
+ is composed of a series of convolutional layers that are applied to
+ the input signal at different periods.
+
+ Args:
+ use_spectral_norm (bool): Whether to use spectral normalization.
+ Defaults to False.
+ """
+
+ def __init__(self, use_spectral_norm=False):
+ super(MultiPeriodDiscriminatorV2, self).__init__()
+ periods = [2, 3, 5, 7, 11, 17, 23, 37]
+ self.discriminators = torch.nn.ModuleList(
+ [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
+ + [DiscriminatorP(p, use_spectral_norm=use_spectral_norm) for p in periods]
+ )
+
+ def forward(self, y, y_hat):
+ """
+ Forward pass of the multi-period discriminator V2.
+
+ Args:
+ y (torch.Tensor): Real audio signal.
+ y_hat (torch.Tensor): Fake audio signal.
+ """
+ y_d_rs, y_d_gs, fmap_rs, fmap_gs = [], [], [], []
+ for d in self.discriminators:
+ y_d_r, fmap_r = d(y)
+ y_d_g, fmap_g = d(y_hat)
+ y_d_rs.append(y_d_r)
+ y_d_gs.append(y_d_g)
+ fmap_rs.append(fmap_r)
+ fmap_gs.append(fmap_g)
+
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
+
+
+class DiscriminatorS(torch.nn.Module):
+ """
+ Discriminator for the short-term component.
+
+ This class implements a discriminator for the short-term component
+ of the audio signal. The discriminator is composed of a series of
+ convolutional layers that are applied to the input signal.
+ """
+
+ def __init__(self, use_spectral_norm=False):
+ super(DiscriminatorS, self).__init__()
+ norm_f = spectral_norm if use_spectral_norm else weight_norm
+ self.convs = torch.nn.ModuleList(
+ [
+ norm_f(torch.nn.Conv1d(1, 16, 15, 1, padding=7)),
+ norm_f(torch.nn.Conv1d(16, 64, 41, 4, groups=4, padding=20)),
+ norm_f(torch.nn.Conv1d(64, 256, 41, 4, groups=16, padding=20)),
+ norm_f(torch.nn.Conv1d(256, 1024, 41, 4, groups=64, padding=20)),
+ norm_f(torch.nn.Conv1d(1024, 1024, 41, 4, groups=256, padding=20)),
+ norm_f(torch.nn.Conv1d(1024, 1024, 5, 1, padding=2)),
+ ]
+ )
+ self.conv_post = norm_f(torch.nn.Conv1d(1024, 1, 3, 1, padding=1))
+ self.lrelu = torch.nn.LeakyReLU(LRELU_SLOPE)
+
+ def forward(self, x):
+ """
+ Forward pass of the discriminator.
+
+ Args:
+ x (torch.Tensor): Input audio signal.
+ """
+ fmap = []
+ for conv in self.convs:
+ x = self.lrelu(conv(x))
+ fmap.append(x)
+ x = self.conv_post(x)
+ fmap.append(x)
+ x = torch.flatten(x, 1, -1)
+ return x, fmap
+
+
+class DiscriminatorP(torch.nn.Module):
+ """
+ Discriminator for the long-term component.
+
+ This class implements a discriminator for the long-term component
+ of the audio signal. The discriminator is composed of a series of
+ convolutional layers that are applied to the input signal at a given
+ period.
+
+ Args:
+ period (int): Period of the discriminator.
+ kernel_size (int): Kernel size of the convolutional layers.
+ Defaults to 5.
+ stride (int): Stride of the convolutional layers. Defaults to 3.
+ use_spectral_norm (bool): Whether to use spectral normalization.
+ Defaults to False.
+ """
+
+ def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
+ super(DiscriminatorP, self).__init__()
+ self.period = period
+ norm_f = spectral_norm if use_spectral_norm else weight_norm
+
+ in_channels = [1, 32, 128, 512, 1024]
+ out_channels = [32, 128, 512, 1024, 1024]
+
+ self.convs = torch.nn.ModuleList(
+ [
+ norm_f(
+ torch.nn.Conv2d(
+ in_ch,
+ out_ch,
+ (kernel_size, 1),
+ (stride, 1),
+ padding=(get_padding(kernel_size, 1), 0),
+ )
+ )
+ for in_ch, out_ch in zip(in_channels, out_channels)
+ ]
+ )
+
+ self.conv_post = norm_f(torch.nn.Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
+ self.lrelu = torch.nn.LeakyReLU(LRELU_SLOPE)
+
+ def forward(self, x):
+ """
+ Forward pass of the discriminator.
+
+ Args:
+ x (torch.Tensor): Input audio signal.
+ """
+ fmap = []
+ b, c, t = x.shape
+ if t % self.period != 0:
+ n_pad = self.period - (t % self.period)
+ x = torch.nn.functional.pad(x, (0, n_pad), "reflect")
+ x = x.view(b, c, -1, self.period)
+
+ for conv in self.convs:
+ x = self.lrelu(conv(x))
+ fmap.append(x)
+
+ x = self.conv_post(x)
+ fmap.append(x)
+ x = torch.flatten(x, 1, -1)
+ return x, fmap
diff --git a/rvc/lib/algorithm/encoders.py b/rvc/lib/algorithm/encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0823a47e8e7fc7592e4c82a0b1fa7796af169a0
--- /dev/null
+++ b/rvc/lib/algorithm/encoders.py
@@ -0,0 +1,219 @@
+import math
+import torch
+from typing import Optional
+
+from rvc.lib.algorithm.commons import sequence_mask
+from rvc.lib.algorithm.modules import WaveNet
+from rvc.lib.algorithm.normalization import LayerNorm
+from rvc.lib.algorithm.attentions import FFN, MultiHeadAttention
+
+
+class Encoder(torch.nn.Module):
+ """
+ Encoder module for the Transformer model.
+
+ Args:
+ hidden_channels (int): Number of hidden channels in the encoder.
+ filter_channels (int): Number of filter channels in the feed-forward network.
+ n_heads (int): Number of attention heads.
+ n_layers (int): Number of encoder layers.
+ kernel_size (int, optional): Kernel size of the convolution layers in the feed-forward network. Defaults to 1.
+ p_dropout (float, optional): Dropout probability. Defaults to 0.0.
+ window_size (int, optional): Window size for relative positional encoding. Defaults to 10.
+ """
+
+ def __init__(
+ self,
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size=1,
+ p_dropout=0.0,
+ window_size=10,
+ **kwargs
+ ):
+ super().__init__()
+ self.hidden_channels = hidden_channels
+ self.filter_channels = filter_channels
+ self.n_heads = n_heads
+ self.n_layers = n_layers
+ self.kernel_size = kernel_size
+ self.p_dropout = p_dropout
+ self.window_size = window_size
+
+ self.drop = torch.nn.Dropout(p_dropout)
+ self.attn_layers = torch.nn.ModuleList()
+ self.norm_layers_1 = torch.nn.ModuleList()
+ self.ffn_layers = torch.nn.ModuleList()
+ self.norm_layers_2 = torch.nn.ModuleList()
+ for i in range(self.n_layers):
+ self.attn_layers.append(
+ MultiHeadAttention(
+ hidden_channels,
+ hidden_channels,
+ n_heads,
+ p_dropout=p_dropout,
+ window_size=window_size,
+ )
+ )
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
+ self.ffn_layers.append(
+ FFN(
+ hidden_channels,
+ hidden_channels,
+ filter_channels,
+ kernel_size,
+ p_dropout=p_dropout,
+ )
+ )
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
+
+ def forward(self, x, x_mask):
+ attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
+ x = x * x_mask
+ for i in range(self.n_layers):
+ y = self.attn_layers[i](x, x, attn_mask)
+ y = self.drop(y)
+ x = self.norm_layers_1[i](x + y)
+
+ y = self.ffn_layers[i](x, x_mask)
+ y = self.drop(y)
+ x = self.norm_layers_2[i](x + y)
+ x = x * x_mask
+ return x
+
+
+class TextEncoder(torch.nn.Module):
+ """Text Encoder with configurable embedding dimension.
+
+ Args:
+ out_channels (int): Output channels of the encoder.
+ hidden_channels (int): Hidden channels of the encoder.
+ filter_channels (int): Filter channels of the encoder.
+ n_heads (int): Number of attention heads.
+ n_layers (int): Number of encoder layers.
+ kernel_size (int): Kernel size of the convolutional layers.
+ p_dropout (float): Dropout probability.
+ embedding_dim (int): Embedding dimension for phone embeddings (v1 = 256, v2 = 768).
+ f0 (bool, optional): Whether to use F0 embedding. Defaults to True.
+ """
+
+ def __init__(
+ self,
+ out_channels,
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size,
+ p_dropout,
+ embedding_dim,
+ f0=True,
+ ):
+ super(TextEncoder, self).__init__()
+ self.out_channels = out_channels
+ self.hidden_channels = hidden_channels
+ self.filter_channels = filter_channels
+ self.n_heads = n_heads
+ self.n_layers = n_layers
+ self.kernel_size = kernel_size
+ self.p_dropout = float(p_dropout)
+ self.emb_phone = torch.nn.Linear(embedding_dim, hidden_channels)
+ self.lrelu = torch.nn.LeakyReLU(0.1, inplace=True)
+ if f0:
+ self.emb_pitch = torch.nn.Embedding(256, hidden_channels)
+ self.encoder = Encoder(
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size,
+ float(p_dropout),
+ )
+ self.proj = torch.nn.Conv1d(hidden_channels, out_channels * 2, 1)
+
+ def forward(
+ self, phone: torch.Tensor, pitch: Optional[torch.Tensor], lengths: torch.Tensor
+ ):
+ if pitch is None:
+ x = self.emb_phone(phone)
+ else:
+ x = self.emb_phone(phone) + self.emb_pitch(pitch)
+ x = x * math.sqrt(self.hidden_channels) # [b, t, h]
+ x = self.lrelu(x)
+ x = torch.transpose(x, 1, -1) # [b, h, t]
+ x_mask = torch.unsqueeze(sequence_mask(lengths, x.size(2)), 1).to(x.dtype)
+ x = self.encoder(x * x_mask, x_mask)
+ stats = self.proj(x) * x_mask
+
+ m, logs = torch.split(stats, self.out_channels, dim=1)
+ return m, logs, x_mask
+
+
+class PosteriorEncoder(torch.nn.Module):
+ """Posterior Encoder for inferring latent representation.
+
+ Args:
+ in_channels (int): Number of channels in the input.
+ out_channels (int): Number of channels in the output.
+ hidden_channels (int): Number of hidden channels in the encoder.
+ kernel_size (int): Kernel size of the convolutional layers.
+ dilation_rate (int): Dilation rate of the convolutional layers.
+ n_layers (int): Number of layers in the encoder.
+ gin_channels (int, optional): Number of channels for the global conditioning input. Defaults to 0.
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ out_channels,
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ gin_channels=0,
+ ):
+ super(PosteriorEncoder, self).__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.hidden_channels = hidden_channels
+ self.kernel_size = kernel_size
+ self.dilation_rate = dilation_rate
+ self.n_layers = n_layers
+ self.gin_channels = gin_channels
+
+ self.pre = torch.nn.Conv1d(in_channels, hidden_channels, 1)
+ self.enc = WaveNet(
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ gin_channels=gin_channels,
+ )
+ self.proj = torch.nn.Conv1d(hidden_channels, out_channels * 2, 1)
+
+ def forward(
+ self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None
+ ):
+ x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
+ x = self.pre(x) * x_mask
+ x = self.enc(x, x_mask, g=g)
+ stats = self.proj(x) * x_mask
+ m, logs = torch.split(stats, self.out_channels, dim=1)
+ z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask
+ return z, m, logs, x_mask
+
+ def remove_weight_norm(self):
+ """Removes weight normalization from the encoder."""
+ self.enc.remove_weight_norm()
+
+ def __prepare_scriptable__(self):
+ """Prepares the module for scripting."""
+ for hook in self.enc._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ torch.nn.utils.remove_weight_norm(self.enc)
+ return self
diff --git a/rvc/lib/algorithm/generators.py b/rvc/lib/algorithm/generators.py
new file mode 100644
index 0000000000000000000000000000000000000000..75fc2ad106de7a7333f2d610e61b4441cbd98bbb
--- /dev/null
+++ b/rvc/lib/algorithm/generators.py
@@ -0,0 +1,195 @@
+import torch
+from torch.nn.utils import remove_weight_norm
+from torch.nn.utils.parametrizations import weight_norm
+from typing import Optional
+
+from rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock1, ResBlock2
+from rvc.lib.algorithm.commons import init_weights
+
+
+class Generator(torch.nn.Module):
+ """Generator for synthesizing audio. Optimized for performance and quality.
+
+ Args:
+ initial_channel (int): Number of channels in the initial convolutional layer.
+ resblock (str): Type of residual block to use (1 or 2).
+ resblock_kernel_sizes (list): Kernel sizes of the residual blocks.
+ resblock_dilation_sizes (list): Dilation rates of the residual blocks.
+ upsample_rates (list): Upsampling rates.
+ upsample_initial_channel (int): Number of channels in the initial upsampling layer.
+ upsample_kernel_sizes (list): Kernel sizes of the upsampling layers.
+ gin_channels (int, optional): Number of channels for the global conditioning input. Defaults to 0.
+ """
+
+ def __init__(
+ self,
+ initial_channel,
+ resblock,
+ resblock_kernel_sizes,
+ resblock_dilation_sizes,
+ upsample_rates,
+ upsample_initial_channel,
+ upsample_kernel_sizes,
+ gin_channels=0,
+ ):
+ super(Generator, self).__init__()
+ self.num_kernels = len(resblock_kernel_sizes)
+ self.num_upsamples = len(upsample_rates)
+ self.conv_pre = torch.nn.Conv1d(
+ initial_channel, upsample_initial_channel, 7, 1, padding=3
+ )
+ resblock = ResBlock1 if resblock == "1" else ResBlock2
+
+ self.ups_and_resblocks = torch.nn.ModuleList()
+ for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
+ self.ups_and_resblocks.append(
+ weight_norm(
+ torch.nn.ConvTranspose1d(
+ upsample_initial_channel // (2**i),
+ upsample_initial_channel // (2 ** (i + 1)),
+ k,
+ u,
+ padding=(k - u) // 2,
+ )
+ )
+ )
+ ch = upsample_initial_channel // (2 ** (i + 1))
+ for j, (k, d) in enumerate(
+ zip(resblock_kernel_sizes, resblock_dilation_sizes)
+ ):
+ self.ups_and_resblocks.append(resblock(ch, k, d))
+
+ self.conv_post = torch.nn.Conv1d(ch, 1, 7, 1, padding=3, bias=False)
+ self.ups_and_resblocks.apply(init_weights)
+
+ if gin_channels != 0:
+ self.cond = torch.nn.Conv1d(gin_channels, upsample_initial_channel, 1)
+
+ def forward(self, x: torch.Tensor, g: Optional[torch.Tensor] = None):
+ x = self.conv_pre(x)
+ if g is not None:
+ x = x + self.cond(g)
+
+ resblock_idx = 0
+ for _ in range(self.num_upsamples):
+ x = torch.nn.functional.leaky_relu(x, LRELU_SLOPE)
+ x = self.ups_and_resblocks[resblock_idx](x)
+ resblock_idx += 1
+ xs = 0
+ for _ in range(self.num_kernels):
+ xs += self.ups_and_resblocks[resblock_idx](x)
+ resblock_idx += 1
+ x = xs / self.num_kernels
+
+ x = torch.nn.functional.leaky_relu(x)
+ x = self.conv_post(x)
+ x = torch.tanh(x)
+
+ return x
+
+ def __prepare_scriptable__(self):
+ """Prepares the module for scripting."""
+ for l in self.ups_and_resblocks:
+ for hook in l._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ torch.nn.utils.remove_weight_norm(l)
+ return self
+
+ def remove_weight_norm(self):
+ """Removes weight normalization from the upsampling and residual blocks."""
+ for l in self.ups_and_resblocks:
+ remove_weight_norm(l)
+
+
+class SineGen(torch.nn.Module):
+ """Sine wave generator.
+
+ Args:
+ samp_rate (int): Sampling rate in Hz.
+ harmonic_num (int, optional): Number of harmonic overtones. Defaults to 0.
+ sine_amp (float, optional): Amplitude of sine waveform. Defaults to 0.1.
+ noise_std (float, optional): Standard deviation of Gaussian noise. Defaults to 0.003.
+ voiced_threshold (float, optional): F0 threshold for voiced/unvoiced classification. Defaults to 0.
+ flag_for_pulse (bool, optional): Whether this SineGen is used inside PulseGen. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ samp_rate,
+ harmonic_num=0,
+ sine_amp=0.1,
+ noise_std=0.003,
+ voiced_threshold=0,
+ flag_for_pulse=False,
+ ):
+ super(SineGen, self).__init__()
+ self.sine_amp = sine_amp
+ self.noise_std = noise_std
+ self.harmonic_num = harmonic_num
+ self.dim = self.harmonic_num + 1
+ self.sample_rate = samp_rate
+ self.voiced_threshold = voiced_threshold
+
+ def _f02uv(self, f0):
+ """Converts F0 to voiced/unvoiced signal.
+
+ Args:
+ f0 (torch.Tensor): F0 tensor with shape (batch_size, length, 1)..
+ """
+ uv = torch.ones_like(f0)
+ uv = uv * (f0 > self.voiced_threshold)
+ return uv
+
+ def forward(self, f0: torch.Tensor, upp: int):
+ """Generates sine waves.
+
+ Args:
+ f0 (torch.Tensor): F0 tensor with shape (batch_size, length, 1).
+ upp (int): Upsampling factor.
+ """
+ with torch.no_grad():
+ f0 = f0[:, None].transpose(1, 2)
+ f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, device=f0.device)
+ f0_buf[:, :, 0] = f0[:, :, 0]
+ f0_buf[:, :, 1:] = (
+ f0_buf[:, :, 0:1]
+ * torch.arange(2, self.harmonic_num + 2, device=f0.device)[
+ None, None, :
+ ]
+ )
+ rad_values = (f0_buf / float(self.sample_rate)) % 1
+ rand_ini = torch.rand(
+ f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
+ )
+ rand_ini[:, 0] = 0
+ rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
+ tmp_over_one = torch.cumsum(rad_values, 1)
+ tmp_over_one *= upp
+ tmp_over_one = torch.nn.functional.interpolate(
+ tmp_over_one.transpose(2, 1),
+ scale_factor=float(upp),
+ mode="linear",
+ align_corners=True,
+ ).transpose(2, 1)
+ rad_values = torch.nn.functional.interpolate(
+ rad_values.transpose(2, 1), scale_factor=float(upp), mode="nearest"
+ ).transpose(2, 1)
+ tmp_over_one %= 1
+ tmp_over_one_idx = (tmp_over_one[:, 1:, :] - tmp_over_one[:, :-1, :]) < 0
+ cumsum_shift = torch.zeros_like(rad_values)
+ cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
+ sine_waves = torch.sin(
+ torch.cumsum(rad_values + cumsum_shift, dim=1) * 2 * torch.pi
+ )
+ sine_waves = sine_waves * self.sine_amp
+ uv = self._f02uv(f0)
+ uv = torch.nn.functional.interpolate(
+ uv.transpose(2, 1), scale_factor=float(upp), mode="nearest"
+ ).transpose(2, 1)
+ noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
+ noise = noise_amp * torch.randn_like(sine_waves)
+ sine_waves = sine_waves * uv + noise
+ return sine_waves, uv, noise
diff --git a/rvc/lib/algorithm/modules.py b/rvc/lib/algorithm/modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a9bc5414c5f309e957a0f45837c1bdb06051983
--- /dev/null
+++ b/rvc/lib/algorithm/modules.py
@@ -0,0 +1,130 @@
+import torch
+from rvc.lib.algorithm.commons import (
+ fused_add_tanh_sigmoid_multiply_no_jit,
+ fused_add_tanh_sigmoid_multiply,
+)
+
+
+class WaveNet(torch.nn.Module):
+ """WaveNet residual blocks as used in WaveGlow
+
+ Args:
+ hidden_channels (int): Number of hidden channels.
+ kernel_size (int): Size of the convolutional kernel.
+ dilation_rate (int): Dilation rate of the convolution.
+ n_layers (int): Number of convolutional layers.
+ gin_channels (int, optional): Number of conditioning channels. Defaults to 0.
+ p_dropout (float, optional): Dropout probability. Defaults to 0.
+ """
+
+ def __init__(
+ self,
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ gin_channels=0,
+ p_dropout=0,
+ ):
+ super(WaveNet, self).__init__()
+ assert kernel_size % 2 == 1
+ self.hidden_channels = hidden_channels
+ self.kernel_size = (kernel_size,)
+ self.dilation_rate = dilation_rate
+ self.n_layers = n_layers
+ self.gin_channels = gin_channels
+ self.p_dropout = p_dropout
+
+ self.in_layers = torch.nn.ModuleList()
+ self.res_skip_layers = torch.nn.ModuleList()
+ self.drop = torch.nn.Dropout(p_dropout)
+
+ if gin_channels != 0:
+ cond_layer = torch.nn.Conv1d(
+ gin_channels, 2 * hidden_channels * n_layers, 1
+ )
+ self.cond_layer = torch.nn.utils.parametrizations.weight_norm(
+ cond_layer, name="weight"
+ )
+
+ dilations = [dilation_rate**i for i in range(n_layers)]
+ paddings = [(kernel_size * d - d) // 2 for d in dilations]
+
+ for i in range(n_layers):
+ in_layer = torch.nn.Conv1d(
+ hidden_channels,
+ 2 * hidden_channels,
+ kernel_size,
+ dilation=dilations[i],
+ padding=paddings[i],
+ )
+ in_layer = torch.nn.utils.parametrizations.weight_norm(
+ in_layer, name="weight"
+ )
+ self.in_layers.append(in_layer)
+
+ res_skip_channels = (
+ hidden_channels if i == n_layers - 1 else 2 * hidden_channels
+ )
+
+ res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
+ res_skip_layer = torch.nn.utils.parametrizations.weight_norm(
+ res_skip_layer, name="weight"
+ )
+ self.res_skip_layers.append(res_skip_layer)
+
+ def forward(self, x, x_mask, g=None, **kwargs):
+ """Forward pass.
+
+ Args:
+ x (torch.Tensor): Input tensor of shape (batch_size, hidden_channels, time_steps).
+ x_mask (torch.Tensor): Mask tensor of shape (batch_size, 1, time_steps).
+ g (torch.Tensor, optional): Conditioning tensor of shape (batch_size, gin_channels, time_steps).
+ Defaults to None.
+ """
+ output = torch.zeros_like(x)
+ n_channels_tensor = torch.IntTensor([self.hidden_channels])
+
+ if g is not None:
+ g = self.cond_layer(g)
+
+ # Zluda
+ is_zluda = x.device.type == "cuda" and torch.cuda.get_device_name().endswith(
+ "[ZLUDA]"
+ )
+
+ for i in range(self.n_layers):
+ x_in = self.in_layers[i](x)
+ if g is not None:
+ cond_offset = i * 2 * self.hidden_channels
+ g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
+ else:
+ g_l = torch.zeros_like(x_in)
+
+ # Preventing HIP crash by not using jit-decorated function
+ if is_zluda:
+ acts = fused_add_tanh_sigmoid_multiply_no_jit(
+ x_in, g_l, n_channels_tensor
+ )
+ else:
+ acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+
+ acts = self.drop(acts)
+
+ res_skip_acts = self.res_skip_layers[i](acts)
+ if i < self.n_layers - 1:
+ res_acts = res_skip_acts[:, : self.hidden_channels, :]
+ x = (x + res_acts) * x_mask
+ output = output + res_skip_acts[:, self.hidden_channels :, :]
+ else:
+ output = output + res_skip_acts
+ return output * x_mask
+
+ def remove_weight_norm(self):
+ """Remove weight normalization from the module."""
+ if self.gin_channels != 0:
+ torch.nn.utils.remove_weight_norm(self.cond_layer)
+ for l in self.in_layers:
+ torch.nn.utils.remove_weight_norm(l)
+ for l in self.res_skip_layers:
+ torch.nn.utils.remove_weight_norm(l)
diff --git a/rvc/lib/algorithm/normalization.py b/rvc/lib/algorithm/normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..878ec09de09b021bc9a2b92def21e07d42f34c75
--- /dev/null
+++ b/rvc/lib/algorithm/normalization.py
@@ -0,0 +1,31 @@
+import torch
+
+
+class LayerNorm(torch.nn.Module):
+ """Layer normalization module.
+
+ Args:
+ channels (int): Number of channels.
+ eps (float, optional): Epsilon value for numerical stability. Defaults to 1e-5.
+ """
+
+ def __init__(self, channels, eps=1e-5):
+ super().__init__()
+ self.eps = eps
+ self.gamma = torch.nn.Parameter(torch.ones(channels))
+ self.beta = torch.nn.Parameter(torch.zeros(channels))
+
+ def forward(self, x):
+ """Forward pass.
+
+ Args:
+ x (torch.Tensor): Input tensor of shape (batch_size, channels, time_steps).
+
+ """
+ # Transpose to (batch_size, time_steps, channels) for layer_norm
+ x = x.transpose(1, -1)
+ x = torch.nn.functional.layer_norm(
+ x, (x.size(-1),), self.gamma, self.beta, self.eps
+ )
+ # Transpose back to (batch_size, channels, time_steps)
+ return x.transpose(1, -1)
diff --git a/rvc/lib/algorithm/nsf.py b/rvc/lib/algorithm/nsf.py
new file mode 100644
index 0000000000000000000000000000000000000000..465e04de55d4881008552df091fcf1800f4ccd94
--- /dev/null
+++ b/rvc/lib/algorithm/nsf.py
@@ -0,0 +1,196 @@
+import math
+import torch
+from torch.nn.utils import remove_weight_norm
+from torch.nn.utils.parametrizations import weight_norm
+from typing import Optional
+
+from rvc.lib.algorithm.generators import SineGen
+from rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock1, ResBlock2
+from rvc.lib.algorithm.commons import init_weights
+
+
+class SourceModuleHnNSF(torch.nn.Module):
+ """
+ Source Module for harmonic-plus-noise excitation.
+
+ Args:
+ sample_rate (int): Sampling rate in Hz.
+ harmonic_num (int, optional): Number of harmonics above F0. Defaults to 0.
+ sine_amp (float, optional): Amplitude of sine source signal. Defaults to 0.1.
+ add_noise_std (float, optional): Standard deviation of additive Gaussian noise. Defaults to 0.003.
+ voiced_threshod (float, optional): Threshold to set voiced/unvoiced given F0. Defaults to 0.
+ is_half (bool, optional): Whether to use half precision. Defaults to True.
+ """
+
+ def __init__(
+ self,
+ sample_rate,
+ harmonic_num=0,
+ sine_amp=0.1,
+ add_noise_std=0.003,
+ voiced_threshod=0,
+ is_half=True,
+ ):
+ super(SourceModuleHnNSF, self).__init__()
+
+ self.sine_amp = sine_amp
+ self.noise_std = add_noise_std
+ self.is_half = is_half
+
+ self.l_sin_gen = SineGen(
+ sample_rate, harmonic_num, sine_amp, add_noise_std, voiced_threshod
+ )
+ self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
+ self.l_tanh = torch.nn.Tanh()
+
+ def forward(self, x: torch.Tensor, upsample_factor: int = 1):
+ sine_wavs, uv, _ = self.l_sin_gen(x, upsample_factor)
+ sine_wavs = sine_wavs.to(dtype=self.l_linear.weight.dtype)
+ sine_merge = self.l_tanh(self.l_linear(sine_wavs))
+ return sine_merge, None, None
+
+
+class GeneratorNSF(torch.nn.Module):
+ """
+ Generator for synthesizing audio using the NSF (Neural Source Filter) approach.
+
+ Args:
+ initial_channel (int): Number of channels in the initial convolutional layer.
+ resblock (str): Type of residual block to use (1 or 2).
+ resblock_kernel_sizes (list): Kernel sizes of the residual blocks.
+ resblock_dilation_sizes (list): Dilation rates of the residual blocks.
+ upsample_rates (list): Upsampling rates.
+ upsample_initial_channel (int): Number of channels in the initial upsampling layer.
+ upsample_kernel_sizes (list): Kernel sizes of the upsampling layers.
+ gin_channels (int): Number of channels for the global conditioning input.
+ sr (int): Sampling rate.
+ is_half (bool, optional): Whether to use half precision. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ initial_channel,
+ resblock,
+ resblock_kernel_sizes,
+ resblock_dilation_sizes,
+ upsample_rates,
+ upsample_initial_channel,
+ upsample_kernel_sizes,
+ gin_channels,
+ sr,
+ is_half=False,
+ ):
+ super(GeneratorNSF, self).__init__()
+
+ self.num_kernels = len(resblock_kernel_sizes)
+ self.num_upsamples = len(upsample_rates)
+ self.f0_upsamp = torch.nn.Upsample(scale_factor=math.prod(upsample_rates))
+ self.m_source = SourceModuleHnNSF(
+ sample_rate=sr, harmonic_num=0, is_half=is_half
+ )
+
+ self.conv_pre = torch.nn.Conv1d(
+ initial_channel, upsample_initial_channel, 7, 1, padding=3
+ )
+ resblock_cls = ResBlock1 if resblock == "1" else ResBlock2
+
+ self.ups = torch.nn.ModuleList()
+ self.noise_convs = torch.nn.ModuleList()
+
+ channels = [
+ upsample_initial_channel // (2 ** (i + 1))
+ for i in range(len(upsample_rates))
+ ]
+ stride_f0s = [
+ math.prod(upsample_rates[i + 1 :]) if i + 1 < len(upsample_rates) else 1
+ for i in range(len(upsample_rates))
+ ]
+
+ for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
+ self.ups.append(
+ weight_norm(
+ torch.nn.ConvTranspose1d(
+ upsample_initial_channel // (2**i),
+ channels[i],
+ k,
+ u,
+ padding=(k - u) // 2,
+ )
+ )
+ )
+
+ self.noise_convs.append(
+ torch.nn.Conv1d(
+ 1,
+ channels[i],
+ kernel_size=(stride_f0s[i] * 2 if stride_f0s[i] > 1 else 1),
+ stride=stride_f0s[i],
+ padding=(stride_f0s[i] // 2 if stride_f0s[i] > 1 else 0),
+ )
+ )
+
+ self.resblocks = torch.nn.ModuleList(
+ [
+ resblock_cls(channels[i], k, d)
+ for i in range(len(self.ups))
+ for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes)
+ ]
+ )
+
+ self.conv_post = torch.nn.Conv1d(channels[-1], 1, 7, 1, padding=3, bias=False)
+ self.ups.apply(init_weights)
+
+ if gin_channels != 0:
+ self.cond = torch.nn.Conv1d(gin_channels, upsample_initial_channel, 1)
+
+ self.upp = math.prod(upsample_rates)
+ self.lrelu_slope = LRELU_SLOPE
+
+ def forward(self, x, f0, g: Optional[torch.Tensor] = None):
+ har_source, _, _ = self.m_source(f0, self.upp)
+ har_source = har_source.transpose(1, 2)
+ x = self.conv_pre(x)
+
+ if g is not None:
+ x = x + self.cond(g)
+
+ for i, (ups, noise_convs) in enumerate(zip(self.ups, self.noise_convs)):
+ x = torch.nn.functional.leaky_relu(x, self.lrelu_slope)
+ x = ups(x)
+ x = x + noise_convs(har_source)
+
+ xs = sum(
+ [
+ resblock(x)
+ for j, resblock in enumerate(self.resblocks)
+ if j in range(i * self.num_kernels, (i + 1) * self.num_kernels)
+ ]
+ )
+ x = xs / self.num_kernels
+
+ x = torch.nn.functional.leaky_relu(x)
+ x = torch.tanh(self.conv_post(x))
+ return x
+
+ def remove_weight_norm(self):
+ for l in self.ups:
+ remove_weight_norm(l)
+ for l in self.resblocks:
+ l.remove_weight_norm()
+
+ def __prepare_scriptable__(self):
+ for l in self.ups:
+ for hook in l._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ remove_weight_norm(l)
+ for l in self.resblocks:
+ for hook in l._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ remove_weight_norm(l)
+ return self
diff --git a/rvc/lib/algorithm/residuals.py b/rvc/lib/algorithm/residuals.py
new file mode 100644
index 0000000000000000000000000000000000000000..aad4f0bd6026eaf951c71a22815c1ee9d6d3e2b3
--- /dev/null
+++ b/rvc/lib/algorithm/residuals.py
@@ -0,0 +1,309 @@
+from typing import Optional
+import torch
+from torch.nn.utils import remove_weight_norm
+from torch.nn.utils.parametrizations import weight_norm
+
+from rvc.lib.algorithm.modules import WaveNet
+from rvc.lib.algorithm.commons import get_padding, init_weights
+
+LRELU_SLOPE = 0.1
+
+
+# Helper functions
+def create_conv1d_layer(channels, kernel_size, dilation):
+ return weight_norm(
+ torch.nn.Conv1d(
+ channels,
+ channels,
+ kernel_size,
+ 1,
+ dilation=dilation,
+ padding=get_padding(kernel_size, dilation),
+ )
+ )
+
+
+def apply_mask(tensor, mask):
+ return tensor * mask if mask is not None else tensor
+
+
+class ResBlockBase(torch.nn.Module):
+ def __init__(self, channels, kernel_size, dilations):
+ super(ResBlockBase, self).__init__()
+ self.convs1 = torch.nn.ModuleList(
+ [create_conv1d_layer(channels, kernel_size, d) for d in dilations]
+ )
+ self.convs1.apply(init_weights)
+
+ self.convs2 = torch.nn.ModuleList(
+ [create_conv1d_layer(channels, kernel_size, 1) for _ in dilations]
+ )
+ self.convs2.apply(init_weights)
+
+ def forward(self, x, x_mask=None):
+ for c1, c2 in zip(self.convs1, self.convs2):
+ xt = torch.nn.functional.leaky_relu(x, LRELU_SLOPE)
+ xt = apply_mask(xt, x_mask)
+ xt = torch.nn.functional.leaky_relu(c1(xt), LRELU_SLOPE)
+ xt = apply_mask(xt, x_mask)
+ xt = c2(xt)
+ x = xt + x
+ return apply_mask(x, x_mask)
+
+ def remove_weight_norm(self):
+ for conv in self.convs1 + self.convs2:
+ remove_weight_norm(conv)
+
+
+class ResBlock1(ResBlockBase):
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
+ super(ResBlock1, self).__init__(channels, kernel_size, dilation)
+
+
+class ResBlock2(ResBlockBase):
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
+ super(ResBlock2, self).__init__(channels, kernel_size, dilation)
+
+
+class Log(torch.nn.Module):
+ """Logarithm module for flow-based models.
+
+ This module computes the logarithm of the input and its log determinant.
+ During reverse, it computes the exponential of the input.
+ """
+
+ def forward(self, x, x_mask, reverse=False, **kwargs):
+ """Forward pass.
+
+ Args:
+ x (torch.Tensor): Input tensor.
+ x_mask (torch.Tensor): Mask tensor.
+ reverse (bool, optional): Whether to reverse the operation. Defaults to False.
+ """
+ if not reverse:
+ y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask
+ logdet = torch.sum(-y, [1, 2])
+ return y, logdet
+ else:
+ x = torch.exp(x) * x_mask
+ return x
+
+
+class Flip(torch.nn.Module):
+ """Flip module for flow-based models.
+
+ This module flips the input along the time dimension.
+ """
+
+ def forward(self, x, *args, reverse=False, **kwargs):
+ """Forward pass.
+
+ Args:
+ x (torch.Tensor): Input tensor.
+ reverse (bool, optional): Whether to reverse the operation. Defaults to False.
+ """
+ x = torch.flip(x, [1])
+ if not reverse:
+ logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device)
+ return x, logdet
+ else:
+ return x
+
+
+class ElementwiseAffine(torch.nn.Module):
+ """Elementwise affine transformation module for flow-based models.
+
+ This module performs an elementwise affine transformation on the input.
+
+ Args:
+ channels (int): Number of channels.
+
+ """
+
+ def __init__(self, channels):
+ super().__init__()
+ self.channels = channels
+ self.m = torch.nn.Parameter(torch.zeros(channels, 1))
+ self.logs = torch.nn.Parameter(torch.zeros(channels, 1))
+
+ def forward(self, x, x_mask, reverse=False, **kwargs):
+ """Forward pass.
+
+ Args:
+ x (torch.Tensor): Input tensor.
+ x_mask (torch.Tensor): Mask tensor.
+ reverse (bool, optional): Whether to reverse the operation. Defaults to False.
+ """
+ if not reverse:
+ y = self.m + torch.exp(self.logs) * x
+ y = y * x_mask
+ logdet = torch.sum(self.logs * x_mask, [1, 2])
+ return y, logdet
+ else:
+ x = (x - self.m) * torch.exp(-self.logs) * x_mask
+ return x
+
+
+class ResidualCouplingBlock(torch.nn.Module):
+ """Residual Coupling Block for normalizing flow.
+
+ Args:
+ channels (int): Number of channels in the input.
+ hidden_channels (int): Number of hidden channels in the coupling layer.
+ kernel_size (int): Kernel size of the convolutional layers.
+ dilation_rate (int): Dilation rate of the convolutional layers.
+ n_layers (int): Number of layers in the coupling layer.
+ n_flows (int, optional): Number of coupling layers in the block. Defaults to 4.
+ gin_channels (int, optional): Number of channels for the global conditioning input. Defaults to 0.
+ """
+
+ def __init__(
+ self,
+ channels,
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ n_flows=4,
+ gin_channels=0,
+ ):
+ super(ResidualCouplingBlock, self).__init__()
+ self.channels = channels
+ self.hidden_channels = hidden_channels
+ self.kernel_size = kernel_size
+ self.dilation_rate = dilation_rate
+ self.n_layers = n_layers
+ self.n_flows = n_flows
+ self.gin_channels = gin_channels
+
+ self.flows = torch.nn.ModuleList()
+ for i in range(n_flows):
+ self.flows.append(
+ ResidualCouplingLayer(
+ channels,
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ gin_channels=gin_channels,
+ mean_only=True,
+ )
+ )
+ self.flows.append(Flip())
+
+ def forward(
+ self,
+ x: torch.Tensor,
+ x_mask: torch.Tensor,
+ g: Optional[torch.Tensor] = None,
+ reverse: bool = False,
+ ):
+ if not reverse:
+ for flow in self.flows:
+ x, _ = flow(x, x_mask, g=g, reverse=reverse)
+ else:
+ for flow in reversed(self.flows):
+ x = flow.forward(x, x_mask, g=g, reverse=reverse)
+ return x
+
+ def remove_weight_norm(self):
+ """Removes weight normalization from the coupling layers."""
+ for i in range(self.n_flows):
+ self.flows[i * 2].remove_weight_norm()
+
+ def __prepare_scriptable__(self):
+ """Prepares the module for scripting."""
+ for i in range(self.n_flows):
+ for hook in self.flows[i * 2]._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ torch.nn.utils.remove_weight_norm(self.flows[i * 2])
+
+ return self
+
+
+class ResidualCouplingLayer(torch.nn.Module):
+ """Residual coupling layer for flow-based models.
+
+ Args:
+ channels (int): Number of channels.
+ hidden_channels (int): Number of hidden channels.
+ kernel_size (int): Size of the convolutional kernel.
+ dilation_rate (int): Dilation rate of the convolution.
+ n_layers (int): Number of convolutional layers.
+ p_dropout (float, optional): Dropout probability. Defaults to 0.
+ gin_channels (int, optional): Number of conditioning channels. Defaults to 0.
+ mean_only (bool, optional): Whether to use mean-only coupling. Defaults to False.
+ """
+
+ def __init__(
+ self,
+ channels,
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ p_dropout=0,
+ gin_channels=0,
+ mean_only=False,
+ ):
+ assert channels % 2 == 0, "channels should be divisible by 2"
+ super().__init__()
+ self.channels = channels
+ self.hidden_channels = hidden_channels
+ self.kernel_size = kernel_size
+ self.dilation_rate = dilation_rate
+ self.n_layers = n_layers
+ self.half_channels = channels // 2
+ self.mean_only = mean_only
+
+ self.pre = torch.nn.Conv1d(self.half_channels, hidden_channels, 1)
+ self.enc = WaveNet(
+ hidden_channels,
+ kernel_size,
+ dilation_rate,
+ n_layers,
+ p_dropout=p_dropout,
+ gin_channels=gin_channels,
+ )
+ self.post = torch.nn.Conv1d(
+ hidden_channels, self.half_channels * (2 - mean_only), 1
+ )
+ self.post.weight.data.zero_()
+ self.post.bias.data.zero_()
+
+ def forward(self, x, x_mask, g=None, reverse=False):
+ """Forward pass.
+
+ Args:
+ x (torch.Tensor): Input tensor of shape (batch_size, channels, time_steps).
+ x_mask (torch.Tensor): Mask tensor of shape (batch_size, 1, time_steps).
+ g (torch.Tensor, optional): Conditioning tensor of shape (batch_size, gin_channels, time_steps).
+ Defaults to None.
+ reverse (bool, optional): Whether to reverse the operation. Defaults to False.
+ """
+ x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
+ h = self.pre(x0) * x_mask
+ h = self.enc(h, x_mask, g=g)
+ stats = self.post(h) * x_mask
+ if not self.mean_only:
+ m, logs = torch.split(stats, [self.half_channels] * 2, 1)
+ else:
+ m = stats
+ logs = torch.zeros_like(m)
+
+ if not reverse:
+ x1 = m + x1 * torch.exp(logs) * x_mask
+ x = torch.cat([x0, x1], 1)
+ logdet = torch.sum(logs, [1, 2])
+ return x, logdet
+ else:
+ x1 = (x1 - m) * torch.exp(-logs) * x_mask
+ x = torch.cat([x0, x1], 1)
+ return x
+
+ def remove_weight_norm(self):
+ """Remove weight normalization from the module."""
+ self.enc.remove_weight_norm()
diff --git a/rvc/lib/algorithm/synthesizers.py b/rvc/lib/algorithm/synthesizers.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a1aa2366b49d5124c65e4234b6948ed2ca70676
--- /dev/null
+++ b/rvc/lib/algorithm/synthesizers.py
@@ -0,0 +1,237 @@
+import torch
+from typing import Optional
+
+from rvc.lib.algorithm.nsf import GeneratorNSF
+from rvc.lib.algorithm.generators import Generator
+from rvc.lib.algorithm.commons import slice_segments, rand_slice_segments
+from rvc.lib.algorithm.residuals import ResidualCouplingBlock
+from rvc.lib.algorithm.encoders import TextEncoder, PosteriorEncoder
+
+
+class Synthesizer(torch.nn.Module):
+ """
+ Base Synthesizer model.
+
+ Args:
+ spec_channels (int): Number of channels in the spectrogram.
+ segment_size (int): Size of the audio segment.
+ inter_channels (int): Number of channels in the intermediate layers.
+ hidden_channels (int): Number of channels in the hidden layers.
+ filter_channels (int): Number of channels in the filter layers.
+ n_heads (int): Number of attention heads.
+ n_layers (int): Number of layers in the encoder.
+ kernel_size (int): Size of the convolution kernel.
+ p_dropout (float): Dropout probability.
+ resblock (str): Type of residual block.
+ resblock_kernel_sizes (list): Kernel sizes for the residual blocks.
+ resblock_dilation_sizes (list): Dilation sizes for the residual blocks.
+ upsample_rates (list): Upsampling rates for the decoder.
+ upsample_initial_channel (int): Number of channels in the initial upsampling layer.
+ upsample_kernel_sizes (list): Kernel sizes for the upsampling layers.
+ spk_embed_dim (int): Dimension of the speaker embedding.
+ gin_channels (int): Number of channels in the global conditioning vector.
+ sr (int): Sampling rate of the audio.
+ use_f0 (bool): Whether to use F0 information.
+ text_enc_hidden_dim (int): Hidden dimension for the text encoder.
+ kwargs: Additional keyword arguments.
+ """
+
+ def __init__(
+ self,
+ spec_channels,
+ segment_size,
+ inter_channels,
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size,
+ p_dropout,
+ resblock,
+ resblock_kernel_sizes,
+ resblock_dilation_sizes,
+ upsample_rates,
+ upsample_initial_channel,
+ upsample_kernel_sizes,
+ spk_embed_dim,
+ gin_channels,
+ sr,
+ use_f0,
+ text_enc_hidden_dim=768,
+ **kwargs
+ ):
+ super(Synthesizer, self).__init__()
+ self.spec_channels = spec_channels
+ self.inter_channels = inter_channels
+ self.hidden_channels = hidden_channels
+ self.filter_channels = filter_channels
+ self.n_heads = n_heads
+ self.n_layers = n_layers
+ self.kernel_size = kernel_size
+ self.p_dropout = float(p_dropout)
+ self.resblock = resblock
+ self.resblock_kernel_sizes = resblock_kernel_sizes
+ self.resblock_dilation_sizes = resblock_dilation_sizes
+ self.upsample_rates = upsample_rates
+ self.upsample_initial_channel = upsample_initial_channel
+ self.upsample_kernel_sizes = upsample_kernel_sizes
+ self.segment_size = segment_size
+ self.gin_channels = gin_channels
+ self.spk_embed_dim = spk_embed_dim
+ self.use_f0 = use_f0
+
+ self.enc_p = TextEncoder(
+ inter_channels,
+ hidden_channels,
+ filter_channels,
+ n_heads,
+ n_layers,
+ kernel_size,
+ float(p_dropout),
+ text_enc_hidden_dim,
+ f0=use_f0,
+ )
+
+ if use_f0:
+ self.dec = GeneratorNSF(
+ inter_channels,
+ resblock,
+ resblock_kernel_sizes,
+ resblock_dilation_sizes,
+ upsample_rates,
+ upsample_initial_channel,
+ upsample_kernel_sizes,
+ gin_channels=gin_channels,
+ sr=sr,
+ is_half=kwargs["is_half"],
+ )
+ else:
+ self.dec = Generator(
+ inter_channels,
+ resblock,
+ resblock_kernel_sizes,
+ resblock_dilation_sizes,
+ upsample_rates,
+ upsample_initial_channel,
+ upsample_kernel_sizes,
+ gin_channels=gin_channels,
+ )
+
+ self.enc_q = PosteriorEncoder(
+ spec_channels,
+ inter_channels,
+ hidden_channels,
+ 5,
+ 1,
+ 16,
+ gin_channels=gin_channels,
+ )
+ self.flow = ResidualCouplingBlock(
+ inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
+ )
+ self.emb_g = torch.nn.Embedding(self.spk_embed_dim, gin_channels)
+
+ def remove_weight_norm(self):
+ """Removes weight normalization from the model."""
+ self.dec.remove_weight_norm()
+ self.flow.remove_weight_norm()
+ self.enc_q.remove_weight_norm()
+
+ def __prepare_scriptable__(self):
+ for hook in self.dec._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ torch.nn.utils.remove_weight_norm(self.dec)
+ for hook in self.flow._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ torch.nn.utils.remove_weight_norm(self.flow)
+ if hasattr(self, "enc_q"):
+ for hook in self.enc_q._forward_pre_hooks.values():
+ if (
+ hook.__module__ == "torch.nn.utils.parametrizations.weight_norm"
+ and hook.__class__.__name__ == "WeightNorm"
+ ):
+ torch.nn.utils.remove_weight_norm(self.enc_q)
+ return self
+
+ @torch.jit.ignore
+ def forward(
+ self,
+ phone: torch.Tensor,
+ phone_lengths: torch.Tensor,
+ pitch: Optional[torch.Tensor] = None,
+ pitchf: Optional[torch.Tensor] = None,
+ y: torch.Tensor = None,
+ y_lengths: torch.Tensor = None,
+ ds: Optional[torch.Tensor] = None,
+ ):
+ """
+ Forward pass of the model.
+
+ Args:
+ phone (torch.Tensor): Phoneme sequence.
+ phone_lengths (torch.Tensor): Lengths of the phoneme sequences.
+ pitch (torch.Tensor, optional): Pitch sequence.
+ pitchf (torch.Tensor, optional): Fine-grained pitch sequence.
+ y (torch.Tensor, optional): Target spectrogram.
+ y_lengths (torch.Tensor, optional): Lengths of the target spectrograms.
+ ds (torch.Tensor, optional): Speaker embedding. Defaults to None.
+ """
+ g = self.emb_g(ds).unsqueeze(-1)
+ m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
+ if y is not None:
+ z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
+ z_p = self.flow(z, y_mask, g=g)
+ z_slice, ids_slice = rand_slice_segments(z, y_lengths, self.segment_size)
+ if self.use_f0:
+ pitchf = slice_segments(pitchf, ids_slice, self.segment_size, 2)
+ o = self.dec(z_slice, pitchf, g=g)
+ else:
+ o = self.dec(z_slice, g=g)
+ return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
+ else:
+ return None, None, x_mask, None, (None, None, m_p, logs_p, None, None)
+
+ @torch.jit.export
+ def infer(
+ self,
+ phone: torch.Tensor,
+ phone_lengths: torch.Tensor,
+ pitch: Optional[torch.Tensor] = None,
+ nsff0: Optional[torch.Tensor] = None,
+ sid: torch.Tensor = None,
+ rate: Optional[torch.Tensor] = None,
+ ):
+ """
+ Inference of the model.
+
+ Args:
+ phone (torch.Tensor): Phoneme sequence.
+ phone_lengths (torch.Tensor): Lengths of the phoneme sequences.
+ pitch (torch.Tensor, optional): Pitch sequence.
+ nsff0 (torch.Tensor, optional): Fine-grained pitch sequence.
+ sid (torch.Tensor): Speaker embedding.
+ rate (torch.Tensor, optional): Rate for time-stretching. Defaults to None.
+ """
+ g = self.emb_g(sid).unsqueeze(-1)
+ m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
+ z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
+ if rate is not None:
+ assert isinstance(rate, torch.Tensor)
+ head = int(z_p.shape[2] * (1.0 - rate.item()))
+ z_p = z_p[:, :, head:]
+ x_mask = x_mask[:, :, head:]
+ if self.use_f0:
+ nsff0 = nsff0[:, head:]
+ if self.use_f0:
+ z = self.flow(z_p, x_mask, g=g, reverse=True)
+ o = self.dec(z * x_mask, nsff0, g=g)
+ else:
+ z = self.flow(z_p, x_mask, g=g, reverse=True)
+ o = self.dec(z * x_mask, g=g)
+ return o, x_mask, (z, z_p, m_p, logs_p)
diff --git a/rvc/lib/predictors/F0Extractor.py b/rvc/lib/predictors/F0Extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5177c4874a5aeeacad76c6f1e8c87fb448fbd40
--- /dev/null
+++ b/rvc/lib/predictors/F0Extractor.py
@@ -0,0 +1,105 @@
+import dataclasses
+import pathlib
+import libf0
+import librosa
+import numpy as np
+import resampy
+import torch
+import torchcrepe
+import torchfcpe
+import os
+
+# from tools.anyf0.rmvpe import RMVPE
+from rvc.lib.predictors.RMVPE import RMVPE0Predictor
+from rvc.configs.config import Config
+
+config = Config()
+
+
+@dataclasses.dataclass
+class F0Extractor:
+ wav_path: pathlib.Path
+ sample_rate: int = 44100
+ hop_length: int = 512
+ f0_min: int = 50
+ f0_max: int = 1600
+ method: str = "rmvpe"
+ x: np.ndarray = dataclasses.field(init=False)
+
+ def __post_init__(self):
+ self.x, self.sample_rate = librosa.load(self.wav_path, sr=self.sample_rate)
+
+ @property
+ def hop_size(self) -> float:
+ return self.hop_length / self.sample_rate
+
+ @property
+ def wav16k(self) -> np.ndarray:
+ return resampy.resample(self.x, self.sample_rate, 16000)
+
+ def extract_f0(self) -> np.ndarray:
+ f0 = None
+ method = self.method
+ # Fall back to CPU for ZLUDA as these methods use CUcFFT
+ device = (
+ "cpu"
+ if "cuda" in config.device
+ and torch.cuda.get_device_name().endswith("[ZLUDA]")
+ else config.device
+ )
+
+ if method == "crepe":
+ wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(device)
+ f0 = torchcrepe.predict(
+ wav16k_torch,
+ sample_rate=16000,
+ hop_length=160,
+ batch_size=512,
+ fmin=self.f0_min,
+ fmax=self.f0_max,
+ device=device,
+ )
+ f0 = f0[0].cpu().numpy()
+ elif method == "fcpe":
+ audio = librosa.to_mono(self.x)
+ audio_length = len(audio)
+ f0_target_length = (audio_length // self.hop_length) + 1
+ audio = (
+ torch.from_numpy(audio).float().unsqueeze(0).unsqueeze(-1).to(device)
+ )
+ model = torchfcpe.spawn_bundled_infer_model(device=device)
+
+ f0 = model.infer(
+ audio,
+ sr=self.sample_rate,
+ decoder_mode="local_argmax",
+ threshold=0.006,
+ f0_min=self.f0_min,
+ f0_max=self.f0_max,
+ interp_uv=False,
+ output_interp_target_length=f0_target_length,
+ )
+ f0 = f0.squeeze().cpu().numpy()
+ elif method == "rmvpe":
+ is_half = False if device == "cpu" else config.is_half
+ model_rmvpe = RMVPE0Predictor(
+ os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
+ is_half=is_half,
+ device=device,
+ # hop_length=80
+ )
+ f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
+
+ else:
+ raise ValueError(f"Unknown method: {self.method}")
+ return libf0.hz_to_cents(f0, librosa.midi_to_hz(0))
+
+ def plot_f0(self, f0):
+ from matplotlib import pyplot as plt
+
+ plt.figure(figsize=(10, 4))
+ plt.plot(f0)
+ plt.title(self.method)
+ plt.xlabel("Time (frames)")
+ plt.ylabel("F0 (cents)")
+ plt.show()
diff --git a/rvc/lib/predictors/FCPE.py b/rvc/lib/predictors/FCPE.py
new file mode 100644
index 0000000000000000000000000000000000000000..12f6c346aa5d448a2133400a09e103043b5863c8
--- /dev/null
+++ b/rvc/lib/predictors/FCPE.py
@@ -0,0 +1,920 @@
+from typing import Union
+
+import torch.nn.functional as F
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn.utils.parametrizations import weight_norm
+from torchaudio.transforms import Resample
+import os
+import librosa
+import soundfile as sf
+import torch.utils.data
+from librosa.filters import mel as librosa_mel_fn
+import math
+from functools import partial
+
+from einops import rearrange, repeat
+from local_attention import LocalAttention
+from torch import nn
+
+os.environ["LRU_CACHE_CAPACITY"] = "3"
+
+
+def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
+ """Loads wav file to torch tensor."""
+ try:
+ data, sample_rate = sf.read(full_path, always_2d=True)
+ except Exception as error:
+ print(f"An error occurred loading {full_path}: {error}")
+ if return_empty_on_exception:
+ return [], sample_rate or target_sr or 48000
+ else:
+ raise
+
+ data = data[:, 0] if len(data.shape) > 1 else data
+ assert len(data) > 2
+
+ # Normalize data
+ max_mag = (
+ -np.iinfo(data.dtype).min
+ if np.issubdtype(data.dtype, np.integer)
+ else max(np.amax(data), -np.amin(data))
+ )
+ max_mag = (
+ (2**31) + 1 if max_mag > (2**15) else ((2**15) + 1 if max_mag > 1.01 else 1.0)
+ )
+ data = torch.FloatTensor(data.astype(np.float32)) / max_mag
+
+ # Handle exceptions and resample
+ if (torch.isinf(data) | torch.isnan(data)).any() and return_empty_on_exception:
+ return [], sample_rate or target_sr or 48000
+ if target_sr is not None and sample_rate != target_sr:
+ data = torch.from_numpy(
+ librosa.core.resample(
+ data.numpy(), orig_sr=sample_rate, target_sr=target_sr
+ )
+ )
+ sample_rate = target_sr
+
+ return data, sample_rate
+
+
+def dynamic_range_compression(x, C=1, clip_val=1e-5):
+ return np.log(np.clip(x, a_min=clip_val, a_max=None) * C)
+
+
+def dynamic_range_decompression(x, C=1):
+ return np.exp(x) / C
+
+
+def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
+ return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def dynamic_range_decompression_torch(x, C=1):
+ return torch.exp(x) / C
+
+
+class STFT:
+ def __init__(
+ self,
+ sr=22050,
+ n_mels=80,
+ n_fft=1024,
+ win_size=1024,
+ hop_length=256,
+ fmin=20,
+ fmax=11025,
+ clip_val=1e-5,
+ ):
+ self.target_sr = sr
+ self.n_mels = n_mels
+ self.n_fft = n_fft
+ self.win_size = win_size
+ self.hop_length = hop_length
+ self.fmin = fmin
+ self.fmax = fmax
+ self.clip_val = clip_val
+ self.mel_basis = {}
+ self.hann_window = {}
+
+ def get_mel(self, y, keyshift=0, speed=1, center=False, train=False):
+ sample_rate = self.target_sr
+ n_mels = self.n_mels
+ n_fft = self.n_fft
+ win_size = self.win_size
+ hop_length = self.hop_length
+ fmin = self.fmin
+ fmax = self.fmax
+ clip_val = self.clip_val
+
+ factor = 2 ** (keyshift / 12)
+ n_fft_new = int(np.round(n_fft * factor))
+ win_size_new = int(np.round(win_size * factor))
+ hop_length_new = int(np.round(hop_length * speed))
+
+ # Optimize mel_basis and hann_window caching
+ mel_basis = self.mel_basis if not train else {}
+ hann_window = self.hann_window if not train else {}
+
+ mel_basis_key = str(fmax) + "_" + str(y.device)
+ if mel_basis_key not in mel_basis:
+ mel = librosa_mel_fn(
+ sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax
+ )
+ mel_basis[mel_basis_key] = torch.from_numpy(mel).float().to(y.device)
+
+ keyshift_key = str(keyshift) + "_" + str(y.device)
+ if keyshift_key not in hann_window:
+ hann_window[keyshift_key] = torch.hann_window(win_size_new).to(y.device)
+
+ # Padding and STFT
+ pad_left = (win_size_new - hop_length_new) // 2
+ pad_right = max(
+ (win_size_new - hop_length_new + 1) // 2,
+ win_size_new - y.size(-1) - pad_left,
+ )
+ mode = "reflect" if pad_right < y.size(-1) else "constant"
+ y = torch.nn.functional.pad(y.unsqueeze(1), (pad_left, pad_right), mode=mode)
+ y = y.squeeze(1)
+
+ spec = torch.stft(
+ y,
+ n_fft_new,
+ hop_length=hop_length_new,
+ win_length=win_size_new,
+ window=hann_window[keyshift_key],
+ center=center,
+ pad_mode="reflect",
+ normalized=False,
+ onesided=True,
+ return_complex=True,
+ )
+ spec = torch.sqrt(spec.real.pow(2) + spec.imag.pow(2) + (1e-9))
+
+ # Handle keyshift and mel conversion
+ if keyshift != 0:
+ size = n_fft // 2 + 1
+ resize = spec.size(1)
+ spec = (
+ F.pad(spec, (0, 0, 0, size - resize))
+ if resize < size
+ else spec[:, :size, :]
+ )
+ spec = spec * win_size / win_size_new
+ spec = torch.matmul(mel_basis[mel_basis_key], spec)
+ spec = dynamic_range_compression_torch(spec, clip_val=clip_val)
+ return spec
+
+ def __call__(self, audiopath):
+ audio, sr = load_wav_to_torch(audiopath, target_sr=self.target_sr)
+ spect = self.get_mel(audio.unsqueeze(0)).squeeze(0)
+ return spect
+
+
+stft = STFT()
+
+
+def softmax_kernel(
+ data, *, projection_matrix, is_query, normalize_data=True, eps=1e-4, device=None
+):
+ b, h, *_ = data.shape
+
+ # Normalize data
+ data_normalizer = (data.shape[-1] ** -0.25) if normalize_data else 1.0
+
+ # Project data
+ ratio = projection_matrix.shape[0] ** -0.5
+ projection = repeat(projection_matrix, "j d -> b h j d", b=b, h=h)
+ projection = projection.type_as(data)
+ data_dash = torch.einsum("...id,...jd->...ij", (data_normalizer * data), projection)
+
+ # Calculate diagonal data
+ diag_data = data**2
+ diag_data = torch.sum(diag_data, dim=-1)
+ diag_data = (diag_data / 2.0) * (data_normalizer**2)
+ diag_data = diag_data.unsqueeze(dim=-1)
+
+ # Apply softmax
+ if is_query:
+ data_dash = ratio * (
+ torch.exp(
+ data_dash
+ - diag_data
+ - torch.max(data_dash, dim=-1, keepdim=True).values
+ )
+ + eps
+ )
+ else:
+ data_dash = ratio * (torch.exp(data_dash - diag_data + eps))
+
+ return data_dash.type_as(data)
+
+
+def orthogonal_matrix_chunk(cols, qr_uniform_q=False, device=None):
+ unstructured_block = torch.randn((cols, cols), device=device)
+ q, r = torch.linalg.qr(unstructured_block.cpu(), mode="reduced")
+ q, r = map(lambda t: t.to(device), (q, r))
+
+ if qr_uniform_q:
+ d = torch.diag(r, 0)
+ q *= d.sign()
+ return q.t()
+
+
+def exists(val):
+ return val is not None
+
+
+def empty(tensor):
+ return tensor.numel() == 0
+
+
+def default(val, d):
+ return val if exists(val) else d
+
+
+def cast_tuple(val):
+ return (val,) if not isinstance(val, tuple) else val
+
+
+class PCmer(nn.Module):
+ def __init__(
+ self,
+ num_layers,
+ num_heads,
+ dim_model,
+ dim_keys,
+ dim_values,
+ residual_dropout,
+ attention_dropout,
+ ):
+ super().__init__()
+ self.num_layers = num_layers
+ self.num_heads = num_heads
+ self.dim_model = dim_model
+ self.dim_values = dim_values
+ self.dim_keys = dim_keys
+ self.residual_dropout = residual_dropout
+ self.attention_dropout = attention_dropout
+
+ self._layers = nn.ModuleList([_EncoderLayer(self) for _ in range(num_layers)])
+
+ def forward(self, phone, mask=None):
+ for layer in self._layers:
+ phone = layer(phone, mask)
+ return phone
+
+
+class _EncoderLayer(nn.Module):
+ def __init__(self, parent: PCmer):
+ super().__init__()
+ self.conformer = ConformerConvModule(parent.dim_model)
+ self.norm = nn.LayerNorm(parent.dim_model)
+ self.dropout = nn.Dropout(parent.residual_dropout)
+ self.attn = SelfAttention(
+ dim=parent.dim_model, heads=parent.num_heads, causal=False
+ )
+
+ def forward(self, phone, mask=None):
+ phone = phone + (self.attn(self.norm(phone), mask=mask))
+ phone = phone + (self.conformer(phone))
+ return phone
+
+
+def calc_same_padding(kernel_size):
+ pad = kernel_size // 2
+ return (pad, pad - (kernel_size + 1) % 2)
+
+
+class Swish(nn.Module):
+ def forward(self, x):
+ return x * x.sigmoid()
+
+
+class Transpose(nn.Module):
+ def __init__(self, dims):
+ super().__init__()
+ assert len(dims) == 2, "dims must be a tuple of two dimensions"
+ self.dims = dims
+
+ def forward(self, x):
+ return x.transpose(*self.dims)
+
+
+class GLU(nn.Module):
+ def __init__(self, dim):
+ super().__init__()
+ self.dim = dim
+
+ def forward(self, x):
+ out, gate = x.chunk(2, dim=self.dim)
+ return out * gate.sigmoid()
+
+
+class DepthWiseConv1d(nn.Module):
+ def __init__(self, chan_in, chan_out, kernel_size, padding):
+ super().__init__()
+ self.padding = padding
+ self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, groups=chan_in)
+
+ def forward(self, x):
+ x = F.pad(x, self.padding)
+ return self.conv(x)
+
+
+class ConformerConvModule(nn.Module):
+ def __init__(
+ self, dim, causal=False, expansion_factor=2, kernel_size=31, dropout=0.0
+ ):
+ super().__init__()
+
+ inner_dim = dim * expansion_factor
+ padding = calc_same_padding(kernel_size) if not causal else (kernel_size - 1, 0)
+
+ self.net = nn.Sequential(
+ nn.LayerNorm(dim),
+ Transpose((1, 2)),
+ nn.Conv1d(dim, inner_dim * 2, 1),
+ GLU(dim=1),
+ DepthWiseConv1d(
+ inner_dim, inner_dim, kernel_size=kernel_size, padding=padding
+ ),
+ Swish(),
+ nn.Conv1d(inner_dim, dim, 1),
+ Transpose((1, 2)),
+ nn.Dropout(dropout),
+ )
+
+ def forward(self, x):
+ return self.net(x)
+
+
+def linear_attention(q, k, v):
+ if v is None:
+ out = torch.einsum("...ed,...nd->...ne", k, q)
+ return out
+ else:
+ k_cumsum = k.sum(dim=-2)
+ D_inv = 1.0 / (torch.einsum("...nd,...d->...n", q, k_cumsum.type_as(q)) + 1e-8)
+ context = torch.einsum("...nd,...ne->...de", k, v)
+ out = torch.einsum("...de,...nd,...n->...ne", context, q, D_inv)
+ return out
+
+
+def gaussian_orthogonal_random_matrix(
+ nb_rows, nb_columns, scaling=0, qr_uniform_q=False, device=None
+):
+ nb_full_blocks = int(nb_rows / nb_columns)
+ block_list = []
+
+ for _ in range(nb_full_blocks):
+ q = orthogonal_matrix_chunk(
+ nb_columns, qr_uniform_q=qr_uniform_q, device=device
+ )
+ block_list.append(q)
+
+ remaining_rows = nb_rows - nb_full_blocks * nb_columns
+ if remaining_rows > 0:
+ q = orthogonal_matrix_chunk(
+ nb_columns, qr_uniform_q=qr_uniform_q, device=device
+ )
+ block_list.append(q[:remaining_rows])
+
+ final_matrix = torch.cat(block_list)
+
+ if scaling == 0:
+ multiplier = torch.randn((nb_rows, nb_columns), device=device).norm(dim=1)
+ elif scaling == 1:
+ multiplier = math.sqrt((float(nb_columns))) * torch.ones(
+ (nb_rows,), device=device
+ )
+ else:
+ raise ValueError(f"Invalid scaling {scaling}")
+
+ return torch.diag(multiplier) @ final_matrix
+
+
+class FastAttention(nn.Module):
+ def __init__(
+ self,
+ dim_heads,
+ nb_features=None,
+ ortho_scaling=0,
+ causal=False,
+ generalized_attention=False,
+ kernel_fn=nn.ReLU(),
+ qr_uniform_q=False,
+ no_projection=False,
+ ):
+ super().__init__()
+ nb_features = default(nb_features, int(dim_heads * math.log(dim_heads)))
+
+ self.dim_heads = dim_heads
+ self.nb_features = nb_features
+ self.ortho_scaling = ortho_scaling
+
+ self.create_projection = partial(
+ gaussian_orthogonal_random_matrix,
+ nb_rows=self.nb_features,
+ nb_columns=dim_heads,
+ scaling=ortho_scaling,
+ qr_uniform_q=qr_uniform_q,
+ )
+ projection_matrix = self.create_projection()
+ self.register_buffer("projection_matrix", projection_matrix)
+
+ self.generalized_attention = generalized_attention
+ self.kernel_fn = kernel_fn
+ self.no_projection = no_projection
+ self.causal = causal
+
+ @torch.no_grad()
+ def redraw_projection_matrix(self):
+ projections = self.create_projection()
+ self.projection_matrix.copy_(projections)
+ del projections
+
+ def forward(self, q, k, v):
+ device = q.device
+
+ if self.no_projection:
+ q = q.softmax(dim=-1)
+ k = torch.exp(k) if self.causal else k.softmax(dim=-2)
+ else:
+ create_kernel = partial(
+ softmax_kernel, projection_matrix=self.projection_matrix, device=device
+ )
+ q = create_kernel(q, is_query=True)
+ k = create_kernel(k, is_query=False)
+
+ attn_fn = linear_attention if not self.causal else self.causal_linear_fn
+
+ if v is None:
+ out = attn_fn(q, k, None)
+ return out
+ else:
+ out = attn_fn(q, k, v)
+ return out
+
+
+class SelfAttention(nn.Module):
+ def __init__(
+ self,
+ dim,
+ causal=False,
+ heads=8,
+ dim_head=64,
+ local_heads=0,
+ local_window_size=256,
+ nb_features=None,
+ feature_redraw_interval=1000,
+ generalized_attention=False,
+ kernel_fn=nn.ReLU(),
+ qr_uniform_q=False,
+ dropout=0.0,
+ no_projection=False,
+ ):
+ super().__init__()
+ assert dim % heads == 0, "dimension must be divisible by number of heads"
+ dim_head = default(dim_head, dim // heads)
+ inner_dim = dim_head * heads
+ self.fast_attention = FastAttention(
+ dim_head,
+ nb_features,
+ causal=causal,
+ generalized_attention=generalized_attention,
+ kernel_fn=kernel_fn,
+ qr_uniform_q=qr_uniform_q,
+ no_projection=no_projection,
+ )
+
+ self.heads = heads
+ self.global_heads = heads - local_heads
+ self.local_attn = (
+ LocalAttention(
+ window_size=local_window_size,
+ causal=causal,
+ autopad=True,
+ dropout=dropout,
+ look_forward=int(not causal),
+ rel_pos_emb_config=(dim_head, local_heads),
+ )
+ if local_heads > 0
+ else None
+ )
+
+ self.to_q = nn.Linear(dim, inner_dim)
+ self.to_k = nn.Linear(dim, inner_dim)
+ self.to_v = nn.Linear(dim, inner_dim)
+ self.to_out = nn.Linear(inner_dim, dim)
+ self.dropout = nn.Dropout(dropout)
+
+ @torch.no_grad()
+ def redraw_projection_matrix(self):
+ self.fast_attention.redraw_projection_matrix()
+
+ def forward(
+ self,
+ x,
+ context=None,
+ mask=None,
+ context_mask=None,
+ name=None,
+ inference=False,
+ **kwargs,
+ ):
+ _, _, _, h, gh = *x.shape, self.heads, self.global_heads
+
+ cross_attend = exists(context)
+ context = default(context, x)
+ context_mask = default(context_mask, mask) if not cross_attend else context_mask
+ q, k, v = self.to_q(x), self.to_k(context), self.to_v(context)
+
+ q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v))
+ (q, lq), (k, lk), (v, lv) = map(lambda t: (t[:, :gh], t[:, gh:]), (q, k, v))
+
+ attn_outs = []
+ if not empty(q):
+ if exists(context_mask):
+ global_mask = context_mask[:, None, :, None]
+ v.masked_fill_(~global_mask, 0.0)
+ if cross_attend:
+ pass # TODO: Implement cross-attention
+ else:
+ out = self.fast_attention(q, k, v)
+ attn_outs.append(out)
+
+ if not empty(lq):
+ assert (
+ not cross_attend
+ ), "local attention is not compatible with cross attention"
+ out = self.local_attn(lq, lk, lv, input_mask=mask)
+ attn_outs.append(out)
+
+ out = torch.cat(attn_outs, dim=1)
+ out = rearrange(out, "b h n d -> b n (h d)")
+ out = self.to_out(out)
+ return self.dropout(out)
+
+
+def l2_regularization(model, l2_alpha):
+ l2_loss = []
+ for module in model.modules():
+ if type(module) is nn.Conv2d:
+ l2_loss.append((module.weight**2).sum() / 2.0)
+ return l2_alpha * sum(l2_loss)
+
+
+class FCPE(nn.Module):
+ def __init__(
+ self,
+ input_channel=128,
+ out_dims=360,
+ n_layers=12,
+ n_chans=512,
+ use_siren=False,
+ use_full=False,
+ loss_mse_scale=10,
+ loss_l2_regularization=False,
+ loss_l2_regularization_scale=1,
+ loss_grad1_mse=False,
+ loss_grad1_mse_scale=1,
+ f0_max=1975.5,
+ f0_min=32.70,
+ confidence=False,
+ threshold=0.05,
+ use_input_conv=True,
+ ):
+ super().__init__()
+ if use_siren is True:
+ raise ValueError("Siren is not supported yet.")
+ if use_full is True:
+ raise ValueError("Full model is not supported yet.")
+
+ self.loss_mse_scale = loss_mse_scale if (loss_mse_scale is not None) else 10
+ self.loss_l2_regularization = (
+ loss_l2_regularization if (loss_l2_regularization is not None) else False
+ )
+ self.loss_l2_regularization_scale = (
+ loss_l2_regularization_scale
+ if (loss_l2_regularization_scale is not None)
+ else 1
+ )
+ self.loss_grad1_mse = loss_grad1_mse if (loss_grad1_mse is not None) else False
+ self.loss_grad1_mse_scale = (
+ loss_grad1_mse_scale if (loss_grad1_mse_scale is not None) else 1
+ )
+ self.f0_max = f0_max if (f0_max is not None) else 1975.5
+ self.f0_min = f0_min if (f0_min is not None) else 32.70
+ self.confidence = confidence if (confidence is not None) else False
+ self.threshold = threshold if (threshold is not None) else 0.05
+ self.use_input_conv = use_input_conv if (use_input_conv is not None) else True
+
+ self.cent_table_b = torch.Tensor(
+ np.linspace(
+ self.f0_to_cent(torch.Tensor([f0_min]))[0],
+ self.f0_to_cent(torch.Tensor([f0_max]))[0],
+ out_dims,
+ )
+ )
+ self.register_buffer("cent_table", self.cent_table_b)
+
+ # conv in stack
+ _leaky = nn.LeakyReLU()
+ self.stack = nn.Sequential(
+ nn.Conv1d(input_channel, n_chans, 3, 1, 1),
+ nn.GroupNorm(4, n_chans),
+ _leaky,
+ nn.Conv1d(n_chans, n_chans, 3, 1, 1),
+ )
+
+ # transformer
+ self.decoder = PCmer(
+ num_layers=n_layers,
+ num_heads=8,
+ dim_model=n_chans,
+ dim_keys=n_chans,
+ dim_values=n_chans,
+ residual_dropout=0.1,
+ attention_dropout=0.1,
+ )
+ self.norm = nn.LayerNorm(n_chans)
+
+ # out
+ self.n_out = out_dims
+ self.dense_out = weight_norm(nn.Linear(n_chans, self.n_out))
+
+ def forward(
+ self, mel, infer=True, gt_f0=None, return_hz_f0=False, cdecoder="local_argmax"
+ ):
+ if cdecoder == "argmax":
+ self.cdecoder = self.cents_decoder
+ elif cdecoder == "local_argmax":
+ self.cdecoder = self.cents_local_decoder
+
+ x = (
+ self.stack(mel.transpose(1, 2)).transpose(1, 2)
+ if self.use_input_conv
+ else mel
+ )
+ x = self.decoder(x)
+ x = self.norm(x)
+ x = self.dense_out(x)
+ x = torch.sigmoid(x)
+
+ if not infer:
+ gt_cent_f0 = self.f0_to_cent(gt_f0)
+ gt_cent_f0 = self.gaussian_blurred_cent(gt_cent_f0)
+ loss_all = self.loss_mse_scale * F.binary_cross_entropy(x, gt_cent_f0)
+ if self.loss_l2_regularization:
+ loss_all = loss_all + l2_regularization(
+ model=self, l2_alpha=self.loss_l2_regularization_scale
+ )
+ x = loss_all
+ if infer:
+ x = self.cdecoder(x)
+ x = self.cent_to_f0(x)
+ x = (1 + x / 700).log() if not return_hz_f0 else x
+
+ return x
+
+ def cents_decoder(self, y, mask=True):
+ B, N, _ = y.size()
+ ci = self.cent_table[None, None, :].expand(B, N, -1)
+ rtn = torch.sum(ci * y, dim=-1, keepdim=True) / torch.sum(
+ y, dim=-1, keepdim=True
+ )
+ if mask:
+ confident = torch.max(y, dim=-1, keepdim=True)[0]
+ confident_mask = torch.ones_like(confident)
+ confident_mask[confident <= self.threshold] = float("-INF")
+ rtn = rtn * confident_mask
+ return (rtn, confident) if self.confidence else rtn
+
+ def cents_local_decoder(self, y, mask=True):
+ B, N, _ = y.size()
+ ci = self.cent_table[None, None, :].expand(B, N, -1)
+ confident, max_index = torch.max(y, dim=-1, keepdim=True)
+ local_argmax_index = torch.arange(0, 9).to(max_index.device) + (max_index - 4)
+ local_argmax_index = torch.clamp(local_argmax_index, 0, self.n_out - 1)
+ ci_l = torch.gather(ci, -1, local_argmax_index)
+ y_l = torch.gather(y, -1, local_argmax_index)
+ rtn = torch.sum(ci_l * y_l, dim=-1, keepdim=True) / torch.sum(
+ y_l, dim=-1, keepdim=True
+ )
+ if mask:
+ confident_mask = torch.ones_like(confident)
+ confident_mask[confident <= self.threshold] = float("-INF")
+ rtn = rtn * confident_mask
+ return (rtn, confident) if self.confidence else rtn
+
+ def cent_to_f0(self, cent):
+ return 10.0 * 2 ** (cent / 1200.0)
+
+ def f0_to_cent(self, f0):
+ return 1200.0 * torch.log2(f0 / 10.0)
+
+ def gaussian_blurred_cent(self, cents):
+ mask = (cents > 0.1) & (cents < (1200.0 * np.log2(self.f0_max / 10.0)))
+ B, N, _ = cents.size()
+ ci = self.cent_table[None, None, :].expand(B, N, -1)
+ return torch.exp(-torch.square(ci - cents) / 1250) * mask.float()
+
+
+class FCPEInfer:
+ def __init__(self, model_path, device=None, dtype=torch.float32):
+ if device is None:
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.device = device
+ ckpt = torch.load(model_path, map_location=torch.device(self.device))
+ self.args = DotDict(ckpt["config"])
+ self.dtype = dtype
+ model = FCPE(
+ input_channel=self.args.model.input_channel,
+ out_dims=self.args.model.out_dims,
+ n_layers=self.args.model.n_layers,
+ n_chans=self.args.model.n_chans,
+ use_siren=self.args.model.use_siren,
+ use_full=self.args.model.use_full,
+ loss_mse_scale=self.args.loss.loss_mse_scale,
+ loss_l2_regularization=self.args.loss.loss_l2_regularization,
+ loss_l2_regularization_scale=self.args.loss.loss_l2_regularization_scale,
+ loss_grad1_mse=self.args.loss.loss_grad1_mse,
+ loss_grad1_mse_scale=self.args.loss.loss_grad1_mse_scale,
+ f0_max=self.args.model.f0_max,
+ f0_min=self.args.model.f0_min,
+ confidence=self.args.model.confidence,
+ )
+ model.to(self.device).to(self.dtype)
+ model.load_state_dict(ckpt["model"])
+ model.eval()
+ self.model = model
+ self.wav2mel = Wav2Mel(self.args, dtype=self.dtype, device=self.device)
+
+ @torch.no_grad()
+ def __call__(self, audio, sr, threshold=0.05):
+ self.model.threshold = threshold
+ audio = audio[None, :]
+ mel = self.wav2mel(audio=audio, sample_rate=sr).to(self.dtype)
+ f0 = self.model(mel=mel, infer=True, return_hz_f0=True)
+ return f0
+
+
+class Wav2Mel:
+ def __init__(self, args, device=None, dtype=torch.float32):
+ self.sample_rate = args.mel.sampling_rate
+ self.hop_size = args.mel.hop_size
+ if device is None:
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.device = device
+ self.dtype = dtype
+ self.stft = STFT(
+ args.mel.sampling_rate,
+ args.mel.num_mels,
+ args.mel.n_fft,
+ args.mel.win_size,
+ args.mel.hop_size,
+ args.mel.fmin,
+ args.mel.fmax,
+ )
+ self.resample_kernel = {}
+
+ def extract_nvstft(self, audio, keyshift=0, train=False):
+ mel = self.stft.get_mel(audio, keyshift=keyshift, train=train).transpose(1, 2)
+ return mel
+
+ def extract_mel(self, audio, sample_rate, keyshift=0, train=False):
+ audio = audio.to(self.dtype).to(self.device)
+ if sample_rate == self.sample_rate:
+ audio_res = audio
+ else:
+ key_str = str(sample_rate)
+ if key_str not in self.resample_kernel:
+ self.resample_kernel[key_str] = Resample(
+ sample_rate, self.sample_rate, lowpass_filter_width=128
+ )
+ self.resample_kernel[key_str] = (
+ self.resample_kernel[key_str].to(self.dtype).to(self.device)
+ )
+ audio_res = self.resample_kernel[key_str](audio)
+
+ mel = self.extract_nvstft(
+ audio_res, keyshift=keyshift, train=train
+ ) # B, n_frames, bins
+ n_frames = int(audio.shape[1] // self.hop_size) + 1
+ mel = (
+ torch.cat((mel, mel[:, -1:, :]), 1) if n_frames > int(mel.shape[1]) else mel
+ )
+ mel = mel[:, :n_frames, :] if n_frames < int(mel.shape[1]) else mel
+ return mel
+
+ def __call__(self, audio, sample_rate, keyshift=0, train=False):
+ return self.extract_mel(audio, sample_rate, keyshift=keyshift, train=train)
+
+
+class DotDict(dict):
+ def __getattr__(*args):
+ val = dict.get(*args)
+ return DotDict(val) if type(val) is dict else val
+
+ __setattr__ = dict.__setitem__
+ __delattr__ = dict.__delitem__
+
+
+class F0Predictor(object):
+ def compute_f0(self, wav, p_len):
+ pass
+
+ def compute_f0_uv(self, wav, p_len):
+ pass
+
+
+class FCPEF0Predictor(F0Predictor):
+ def __init__(
+ self,
+ model_path,
+ hop_length=512,
+ f0_min=50,
+ f0_max=1100,
+ dtype=torch.float32,
+ device=None,
+ sample_rate=44100,
+ threshold=0.05,
+ ):
+ self.fcpe = FCPEInfer(model_path, device=device, dtype=dtype)
+ self.hop_length = hop_length
+ self.f0_min = f0_min
+ self.f0_max = f0_max
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+ self.threshold = threshold
+ self.sample_rate = sample_rate
+ self.dtype = dtype
+ self.name = "fcpe"
+
+ def repeat_expand(
+ self,
+ content: Union[torch.Tensor, np.ndarray],
+ target_len: int,
+ mode: str = "nearest",
+ ):
+ ndim = content.ndim
+ content = (
+ content[None, None]
+ if ndim == 1
+ else content[None] if ndim == 2 else content
+ )
+ assert content.ndim == 3
+ is_np = isinstance(content, np.ndarray)
+ content = torch.from_numpy(content) if is_np else content
+ results = torch.nn.functional.interpolate(content, size=target_len, mode=mode)
+ results = results.numpy() if is_np else results
+ return results[0, 0] if ndim == 1 else results[0] if ndim == 2 else results
+
+ def post_process(self, x, sample_rate, f0, pad_to):
+ f0 = (
+ torch.from_numpy(f0).float().to(x.device)
+ if isinstance(f0, np.ndarray)
+ else f0
+ )
+ f0 = self.repeat_expand(f0, pad_to) if pad_to is not None else f0
+
+ vuv_vector = torch.zeros_like(f0)
+ vuv_vector[f0 > 0.0] = 1.0
+ vuv_vector[f0 <= 0.0] = 0.0
+
+ nzindex = torch.nonzero(f0).squeeze()
+ f0 = torch.index_select(f0, dim=0, index=nzindex).cpu().numpy()
+ time_org = self.hop_length / sample_rate * nzindex.cpu().numpy()
+ time_frame = np.arange(pad_to) * self.hop_length / sample_rate
+
+ vuv_vector = F.interpolate(vuv_vector[None, None, :], size=pad_to)[0][0]
+
+ if f0.shape[0] <= 0:
+ return np.zeros(pad_to), vuv_vector.cpu().numpy()
+ if f0.shape[0] == 1:
+ return np.ones(pad_to) * f0[0], vuv_vector.cpu().numpy()
+
+ f0 = np.interp(time_frame, time_org, f0, left=f0[0], right=f0[-1])
+ return f0, vuv_vector.cpu().numpy()
+
+ def compute_f0(self, wav, p_len=None):
+ x = torch.FloatTensor(wav).to(self.dtype).to(self.device)
+ p_len = x.shape[0] // self.hop_length if p_len is None else p_len
+ f0 = self.fcpe(x, sr=self.sample_rate, threshold=self.threshold)[0, :, 0]
+ if torch.all(f0 == 0):
+ return f0.cpu().numpy() if p_len is None else np.zeros(p_len), (
+ f0.cpu().numpy() if p_len is None else np.zeros(p_len)
+ )
+ return self.post_process(x, self.sample_rate, f0, p_len)[0]
+
+ def compute_f0_uv(self, wav, p_len=None):
+ x = torch.FloatTensor(wav).to(self.dtype).to(self.device)
+ p_len = x.shape[0] // self.hop_length if p_len is None else p_len
+ f0 = self.fcpe(x, sr=self.sample_rate, threshold=self.threshold)[0, :, 0]
+ if torch.all(f0 == 0):
+ return f0.cpu().numpy() if p_len is None else np.zeros(p_len), (
+ f0.cpu().numpy() if p_len is None else np.zeros(p_len)
+ )
+ return self.post_process(x, self.sample_rate, f0, p_len)
diff --git a/rvc/lib/predictors/RMVPE.py b/rvc/lib/predictors/RMVPE.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e9f6ddd4bb061164c2910e8c4216ef51acd3503
--- /dev/null
+++ b/rvc/lib/predictors/RMVPE.py
@@ -0,0 +1,569 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from librosa.filters import mel
+from typing import List
+
+# Constants for readability
+N_MELS = 128
+N_CLASS = 360
+
+
+# Define a helper function for creating convolutional blocks
+class ConvBlockRes(nn.Module):
+ """
+ A convolutional block with residual connection.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ momentum (float): Momentum for batch normalization.
+ """
+
+ def __init__(self, in_channels, out_channels, momentum=0.01):
+ super(ConvBlockRes, self).__init__()
+ self.conv = nn.Sequential(
+ nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=(3, 3),
+ stride=(1, 1),
+ padding=(1, 1),
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels, momentum=momentum),
+ nn.ReLU(),
+ nn.Conv2d(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=(3, 3),
+ stride=(1, 1),
+ padding=(1, 1),
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels, momentum=momentum),
+ nn.ReLU(),
+ )
+ if in_channels != out_channels:
+ self.shortcut = nn.Conv2d(in_channels, out_channels, (1, 1))
+ self.is_shortcut = True
+ else:
+ self.is_shortcut = False
+
+ def forward(self, x):
+ if self.is_shortcut:
+ return self.conv(x) + self.shortcut(x)
+ else:
+ return self.conv(x) + x
+
+
+# Define a class for residual encoder blocks
+class ResEncoderBlock(nn.Module):
+ """
+ A residual encoder block.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ kernel_size (tuple): Size of the average pooling kernel.
+ n_blocks (int): Number of convolutional blocks in the block.
+ momentum (float): Momentum for batch normalization.
+ """
+
+ def __init__(
+ self, in_channels, out_channels, kernel_size, n_blocks=1, momentum=0.01
+ ):
+ super(ResEncoderBlock, self).__init__()
+ self.n_blocks = n_blocks
+ self.conv = nn.ModuleList()
+ self.conv.append(ConvBlockRes(in_channels, out_channels, momentum))
+ for _ in range(n_blocks - 1):
+ self.conv.append(ConvBlockRes(out_channels, out_channels, momentum))
+ self.kernel_size = kernel_size
+ if self.kernel_size is not None:
+ self.pool = nn.AvgPool2d(kernel_size=kernel_size)
+
+ def forward(self, x):
+ for i in range(self.n_blocks):
+ x = self.conv[i](x)
+ if self.kernel_size is not None:
+ return x, self.pool(x)
+ else:
+ return x
+
+
+# Define a class for the encoder
+class Encoder(nn.Module):
+ """
+ The encoder part of the DeepUnet.
+
+ Args:
+ in_channels (int): Number of input channels.
+ in_size (int): Size of the input tensor.
+ n_encoders (int): Number of encoder blocks.
+ kernel_size (tuple): Size of the average pooling kernel.
+ n_blocks (int): Number of convolutional blocks in each encoder block.
+ out_channels (int): Number of output channels for the first encoder block.
+ momentum (float): Momentum for batch normalization.
+ """
+
+ def __init__(
+ self,
+ in_channels,
+ in_size,
+ n_encoders,
+ kernel_size,
+ n_blocks,
+ out_channels=16,
+ momentum=0.01,
+ ):
+ super(Encoder, self).__init__()
+ self.n_encoders = n_encoders
+ self.bn = nn.BatchNorm2d(in_channels, momentum=momentum)
+ self.layers = nn.ModuleList()
+ self.latent_channels = []
+ for i in range(self.n_encoders):
+ self.layers.append(
+ ResEncoderBlock(
+ in_channels, out_channels, kernel_size, n_blocks, momentum=momentum
+ )
+ )
+ self.latent_channels.append([out_channels, in_size])
+ in_channels = out_channels
+ out_channels *= 2
+ in_size //= 2
+ self.out_size = in_size
+ self.out_channel = out_channels
+
+ def forward(self, x: torch.Tensor):
+ concat_tensors: List[torch.Tensor] = []
+ x = self.bn(x)
+ for i in range(self.n_encoders):
+ t, x = self.layers[i](x)
+ concat_tensors.append(t)
+ return x, concat_tensors
+
+
+# Define a class for the intermediate layer
+class Intermediate(nn.Module):
+ """
+ The intermediate layer of the DeepUnet.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ n_inters (int): Number of convolutional blocks in the intermediate layer.
+ n_blocks (int): Number of convolutional blocks in each intermediate block.
+ momentum (float): Momentum for batch normalization.
+ """
+
+ def __init__(self, in_channels, out_channels, n_inters, n_blocks, momentum=0.01):
+ super(Intermediate, self).__init__()
+ self.n_inters = n_inters
+ self.layers = nn.ModuleList()
+ self.layers.append(
+ ResEncoderBlock(in_channels, out_channels, None, n_blocks, momentum)
+ )
+ for _ in range(self.n_inters - 1):
+ self.layers.append(
+ ResEncoderBlock(out_channels, out_channels, None, n_blocks, momentum)
+ )
+
+ def forward(self, x):
+ for i in range(self.n_inters):
+ x = self.layers[i](x)
+ return x
+
+
+# Define a class for residual decoder blocks
+class ResDecoderBlock(nn.Module):
+ """
+ A residual decoder block.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ stride (tuple): Stride for transposed convolution.
+ n_blocks (int): Number of convolutional blocks in the block.
+ momentum (float): Momentum for batch normalization.
+ """
+
+ def __init__(self, in_channels, out_channels, stride, n_blocks=1, momentum=0.01):
+ super(ResDecoderBlock, self).__init__()
+ out_padding = (0, 1) if stride == (1, 2) else (1, 1)
+ self.n_blocks = n_blocks
+ self.conv1 = nn.Sequential(
+ nn.ConvTranspose2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=(3, 3),
+ stride=stride,
+ padding=(1, 1),
+ output_padding=out_padding,
+ bias=False,
+ ),
+ nn.BatchNorm2d(out_channels, momentum=momentum),
+ nn.ReLU(),
+ )
+ self.conv2 = nn.ModuleList()
+ self.conv2.append(ConvBlockRes(out_channels * 2, out_channels, momentum))
+ for _ in range(n_blocks - 1):
+ self.conv2.append(ConvBlockRes(out_channels, out_channels, momentum))
+
+ def forward(self, x, concat_tensor):
+ x = self.conv1(x)
+ x = torch.cat((x, concat_tensor), dim=1)
+ for i in range(self.n_blocks):
+ x = self.conv2[i](x)
+ return x
+
+
+# Define a class for the decoder
+class Decoder(nn.Module):
+ """
+ The decoder part of the DeepUnet.
+
+ Args:
+ in_channels (int): Number of input channels.
+ n_decoders (int): Number of decoder blocks.
+ stride (tuple): Stride for transposed convolution.
+ n_blocks (int): Number of convolutional blocks in each decoder block.
+ momentum (float): Momentum for batch normalization.
+ """
+
+ def __init__(self, in_channels, n_decoders, stride, n_blocks, momentum=0.01):
+ super(Decoder, self).__init__()
+ self.layers = nn.ModuleList()
+ self.n_decoders = n_decoders
+ for _ in range(self.n_decoders):
+ out_channels = in_channels // 2
+ self.layers.append(
+ ResDecoderBlock(in_channels, out_channels, stride, n_blocks, momentum)
+ )
+ in_channels = out_channels
+
+ def forward(self, x, concat_tensors):
+ for i in range(self.n_decoders):
+ x = self.layers[i](x, concat_tensors[-1 - i])
+ return x
+
+
+# Define a class for the DeepUnet architecture
+class DeepUnet(nn.Module):
+ """
+ The DeepUnet architecture.
+
+ Args:
+ kernel_size (tuple): Size of the average pooling kernel.
+ n_blocks (int): Number of convolutional blocks in each encoder/decoder block.
+ en_de_layers (int): Number of encoder/decoder layers.
+ inter_layers (int): Number of convolutional blocks in the intermediate layer.
+ in_channels (int): Number of input channels.
+ en_out_channels (int): Number of output channels for the first encoder block.
+ """
+
+ def __init__(
+ self,
+ kernel_size,
+ n_blocks,
+ en_de_layers=5,
+ inter_layers=4,
+ in_channels=1,
+ en_out_channels=16,
+ ):
+ super(DeepUnet, self).__init__()
+ self.encoder = Encoder(
+ in_channels, 128, en_de_layers, kernel_size, n_blocks, en_out_channels
+ )
+ self.intermediate = Intermediate(
+ self.encoder.out_channel // 2,
+ self.encoder.out_channel,
+ inter_layers,
+ n_blocks,
+ )
+ self.decoder = Decoder(
+ self.encoder.out_channel, en_de_layers, kernel_size, n_blocks
+ )
+
+ def forward(self, x):
+ x, concat_tensors = self.encoder(x)
+ x = self.intermediate(x)
+ x = self.decoder(x, concat_tensors)
+ return x
+
+
+# Define a class for the end-to-end model
+class E2E(nn.Module):
+ """
+ The end-to-end model.
+
+ Args:
+ n_blocks (int): Number of convolutional blocks in each encoder/decoder block.
+ n_gru (int): Number of GRU layers.
+ kernel_size (tuple): Size of the average pooling kernel.
+ en_de_layers (int): Number of encoder/decoder layers.
+ inter_layers (int): Number of convolutional blocks in the intermediate layer.
+ in_channels (int): Number of input channels.
+ en_out_channels (int): Number of output channels for the first encoder block.
+ """
+
+ def __init__(
+ self,
+ n_blocks,
+ n_gru,
+ kernel_size,
+ en_de_layers=5,
+ inter_layers=4,
+ in_channels=1,
+ en_out_channels=16,
+ ):
+ super(E2E, self).__init__()
+ self.unet = DeepUnet(
+ kernel_size,
+ n_blocks,
+ en_de_layers,
+ inter_layers,
+ in_channels,
+ en_out_channels,
+ )
+ self.cnn = nn.Conv2d(en_out_channels, 3, (3, 3), padding=(1, 1))
+ if n_gru:
+ self.fc = nn.Sequential(
+ BiGRU(3 * 128, 256, n_gru),
+ nn.Linear(512, N_CLASS),
+ nn.Dropout(0.25),
+ nn.Sigmoid(),
+ )
+ else:
+ self.fc = nn.Sequential(
+ nn.Linear(3 * N_MELS, N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
+ )
+
+ def forward(self, mel):
+ mel = mel.transpose(-1, -2).unsqueeze(1)
+ x = self.cnn(self.unet(mel)).transpose(1, 2).flatten(-2)
+ x = self.fc(x)
+ return x
+
+
+# Define a class for the MelSpectrogram extractor
+class MelSpectrogram(torch.nn.Module):
+ """
+ Extracts Mel-spectrogram features from audio.
+
+ Args:
+ is_half (bool): Whether to use half-precision floating-point numbers.
+ n_mel_channels (int): Number of Mel-frequency bands.
+ sample_rate (int): Sampling rate of the audio.
+ win_length (int): Length of the window function in samples.
+ hop_length (int): Hop size between frames in samples.
+ n_fft (int, optional): Length of the FFT window. Defaults to None, which uses win_length.
+ mel_fmin (int, optional): Minimum frequency for the Mel filter bank. Defaults to 0.
+ mel_fmax (int, optional): Maximum frequency for the Mel filter bank. Defaults to None.
+ clamp (float, optional): Minimum value for clamping the Mel-spectrogram. Defaults to 1e-5.
+ """
+
+ def __init__(
+ self,
+ is_half,
+ n_mel_channels,
+ sample_rate,
+ win_length,
+ hop_length,
+ n_fft=None,
+ mel_fmin=0,
+ mel_fmax=None,
+ clamp=1e-5,
+ ):
+ super().__init__()
+ n_fft = win_length if n_fft is None else n_fft
+ self.hann_window = {}
+ mel_basis = mel(
+ sr=sample_rate,
+ n_fft=n_fft,
+ n_mels=n_mel_channels,
+ fmin=mel_fmin,
+ fmax=mel_fmax,
+ htk=True,
+ )
+ mel_basis = torch.from_numpy(mel_basis).float()
+ self.register_buffer("mel_basis", mel_basis)
+ self.n_fft = win_length if n_fft is None else n_fft
+ self.hop_length = hop_length
+ self.win_length = win_length
+ self.sample_rate = sample_rate
+ self.n_mel_channels = n_mel_channels
+ self.clamp = clamp
+ self.is_half = is_half
+
+ def forward(self, audio, keyshift=0, speed=1, center=True):
+ factor = 2 ** (keyshift / 12)
+ n_fft_new = int(np.round(self.n_fft * factor))
+ win_length_new = int(np.round(self.win_length * factor))
+ hop_length_new = int(np.round(self.hop_length * speed))
+ keyshift_key = str(keyshift) + "_" + str(audio.device)
+ if keyshift_key not in self.hann_window:
+ self.hann_window[keyshift_key] = torch.hann_window(win_length_new).to(
+ audio.device
+ )
+
+ # Zluda, fall-back to CPU for FFTs since HIP SDK has no cuFFT alternative
+ source_device = audio.device
+ if audio.device.type == "cuda" and torch.cuda.get_device_name().endswith(
+ "[ZLUDA]"
+ ):
+ audio = audio.to("cpu")
+ self.hann_window[keyshift_key] = self.hann_window[keyshift_key].to("cpu")
+
+ fft = torch.stft(
+ audio,
+ n_fft=n_fft_new,
+ hop_length=hop_length_new,
+ win_length=win_length_new,
+ window=self.hann_window[keyshift_key],
+ center=center,
+ return_complex=True,
+ ).to(source_device)
+
+ magnitude = torch.sqrt(fft.real.pow(2) + fft.imag.pow(2))
+ if keyshift != 0:
+ size = self.n_fft // 2 + 1
+ resize = magnitude.size(1)
+ if resize < size:
+ magnitude = F.pad(magnitude, (0, 0, 0, size - resize))
+ magnitude = magnitude[:, :size, :] * self.win_length / win_length_new
+ mel_output = torch.matmul(self.mel_basis, magnitude)
+ if self.is_half:
+ mel_output = mel_output.half()
+ log_mel_spec = torch.log(torch.clamp(mel_output, min=self.clamp))
+ return log_mel_spec
+
+
+# Define a class for the RMVPE0 predictor
+class RMVPE0Predictor:
+ """
+ A predictor for fundamental frequency (F0) based on the RMVPE0 model.
+
+ Args:
+ model_path (str): Path to the RMVPE0 model file.
+ is_half (bool): Whether to use half-precision floating-point numbers.
+ device (str, optional): Device to use for computation. Defaults to None, which uses CUDA if available.
+ """
+
+ def __init__(self, model_path, is_half, device=None):
+ self.resample_kernel = {}
+ model = E2E(4, 1, (2, 2))
+ ckpt = torch.load(model_path, map_location="cpu")
+ model.load_state_dict(ckpt)
+ model.eval()
+ if is_half:
+ model = model.half()
+ self.model = model
+ self.resample_kernel = {}
+ self.is_half = is_half
+ self.device = device
+ self.mel_extractor = MelSpectrogram(
+ is_half, N_MELS, 16000, 1024, 160, None, 30, 8000
+ ).to(device)
+ self.model = self.model.to(device)
+ cents_mapping = 20 * np.arange(N_CLASS) + 1997.3794084376191
+ self.cents_mapping = np.pad(cents_mapping, (4, 4))
+
+ def mel2hidden(self, mel):
+ """
+ Converts Mel-spectrogram features to hidden representation.
+
+ Args:
+ mel (torch.Tensor): Mel-spectrogram features.
+ """
+ with torch.no_grad():
+ n_frames = mel.shape[-1]
+ mel = F.pad(
+ mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode="reflect"
+ )
+ hidden = self.model(mel)
+ return hidden[:, :n_frames]
+
+ def decode(self, hidden, thred=0.03):
+ """
+ Decodes hidden representation to F0.
+
+ Args:
+ hidden (np.ndarray): Hidden representation.
+ thred (float, optional): Threshold for salience. Defaults to 0.03.
+ """
+ cents_pred = self.to_local_average_cents(hidden, thred=thred)
+ f0 = 10 * (2 ** (cents_pred / 1200))
+ f0[f0 == 10] = 0
+ return f0
+
+ def infer_from_audio(self, audio, thred=0.03):
+ """
+ Infers F0 from audio.
+
+ Args:
+ audio (np.ndarray): Audio signal.
+ thred (float, optional): Threshold for salience. Defaults to 0.03.
+ """
+ audio = torch.from_numpy(audio).float().to(self.device).unsqueeze(0)
+ mel = self.mel_extractor(audio, center=True)
+ hidden = self.mel2hidden(mel)
+ hidden = hidden.squeeze(0).cpu().numpy()
+ if self.is_half == True:
+ hidden = hidden.astype("float32")
+ f0 = self.decode(hidden, thred=thred)
+ return f0
+
+ def to_local_average_cents(self, salience, thred=0.05):
+ """
+ Converts salience to local average cents.
+
+ Args:
+ salience (np.ndarray): Salience values.
+ thred (float, optional): Threshold for salience. Defaults to 0.05.
+ """
+ center = np.argmax(salience, axis=1)
+ salience = np.pad(salience, ((0, 0), (4, 4)))
+ center += 4
+ todo_salience = []
+ todo_cents_mapping = []
+ starts = center - 4
+ ends = center + 5
+ for idx in range(salience.shape[0]):
+ todo_salience.append(salience[:, starts[idx] : ends[idx]][idx])
+ todo_cents_mapping.append(self.cents_mapping[starts[idx] : ends[idx]])
+ todo_salience = np.array(todo_salience)
+ todo_cents_mapping = np.array(todo_cents_mapping)
+ product_sum = np.sum(todo_salience * todo_cents_mapping, 1)
+ weight_sum = np.sum(todo_salience, 1)
+ devided = product_sum / weight_sum
+ maxx = np.max(salience, axis=1)
+ devided[maxx <= thred] = 0
+ return devided
+
+
+# Define a class for BiGRU (bidirectional GRU)
+class BiGRU(nn.Module):
+ """
+ A bidirectional GRU layer.
+
+ Args:
+ input_features (int): Number of input features.
+ hidden_features (int): Number of hidden features.
+ num_layers (int): Number of GRU layers.
+ """
+
+ def __init__(self, input_features, hidden_features, num_layers):
+ super(BiGRU, self).__init__()
+ self.gru = nn.GRU(
+ input_features,
+ hidden_features,
+ num_layers=num_layers,
+ batch_first=True,
+ bidirectional=True,
+ )
+
+ def forward(self, x):
+ return self.gru(x)[0]
diff --git a/rvc/lib/tools/analyzer.py b/rvc/lib/tools/analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4b794348082b168132dda0a23933c6d633f0097
--- /dev/null
+++ b/rvc/lib/tools/analyzer.py
@@ -0,0 +1,76 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import librosa.display
+import librosa
+
+
+def calculate_features(y, sr):
+ stft = np.abs(librosa.stft(y))
+ duration = librosa.get_duration(y=y, sr=sr)
+ cent = librosa.feature.spectral_centroid(S=stft, sr=sr)[0]
+ bw = librosa.feature.spectral_bandwidth(S=stft, sr=sr)[0]
+ rolloff = librosa.feature.spectral_rolloff(S=stft, sr=sr)[0]
+ return stft, duration, cent, bw, rolloff
+
+
+def plot_title(title):
+ plt.suptitle(title, fontsize=16, fontweight="bold")
+
+
+def plot_spectrogram(y, sr, stft, duration, cmap="inferno"):
+ plt.subplot(3, 1, 1)
+ plt.imshow(
+ librosa.amplitude_to_db(stft, ref=np.max),
+ origin="lower",
+ extent=[0, duration, 0, sr / 1000],
+ aspect="auto",
+ cmap=cmap, # Change the colormap here
+ )
+ plt.colorbar(format="%+2.0f dB")
+ plt.xlabel("Time (s)")
+ plt.ylabel("Frequency (kHz)")
+ plt.title("Spectrogram")
+
+
+def plot_waveform(y, sr, duration):
+ plt.subplot(3, 1, 2)
+ librosa.display.waveshow(y, sr=sr)
+ plt.xlabel("Time (s)")
+ plt.ylabel("Amplitude")
+ plt.title("Waveform")
+
+
+def plot_features(times, cent, bw, rolloff, duration):
+ plt.subplot(3, 1, 3)
+ plt.plot(times, cent, label="Spectral Centroid (kHz)", color="b")
+ plt.plot(times, bw, label="Spectral Bandwidth (kHz)", color="g")
+ plt.plot(times, rolloff, label="Spectral Rolloff (kHz)", color="r")
+ plt.xlabel("Time (s)")
+ plt.title("Spectral Features")
+ plt.legend()
+
+
+def analyze_audio(audio_file, save_plot_path="logs/audio_analysis.png"):
+ y, sr = librosa.load(audio_file)
+ stft, duration, cent, bw, rolloff = calculate_features(y, sr)
+
+ plt.figure(figsize=(12, 10))
+
+ plot_title("Audio Analysis" + " - " + audio_file.split("/")[-1])
+ plot_spectrogram(y, sr, stft, duration)
+ plot_waveform(y, sr, duration)
+ plot_features(librosa.times_like(cent), cent, bw, rolloff, duration)
+
+ plt.tight_layout()
+
+ if save_plot_path:
+ plt.savefig(save_plot_path, bbox_inches="tight", dpi=300)
+ plt.close()
+
+ audio_info = f"""Sample Rate: {sr}\nDuration: {(
+ str(round(duration, 2)) + " seconds"
+ if duration < 60
+ else str(round(duration / 60, 2)) + " minutes"
+ )}\nNumber of Samples: {len(y)}\nBits per Sample: {librosa.get_samplerate(audio_file)}\nChannels: {"Mono (1)" if y.ndim == 1 else "Stereo (2)"}"""
+
+ return audio_info, save_plot_path
diff --git a/rvc/lib/tools/gdown.py b/rvc/lib/tools/gdown.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb5ca071e52d3d48c58708ee2fbbeefb205827d3
--- /dev/null
+++ b/rvc/lib/tools/gdown.py
@@ -0,0 +1,354 @@
+import os
+import re
+import six
+import sys
+import json
+import tqdm
+import time
+import shutil
+import warnings
+import tempfile
+import textwrap
+import requests
+from six.moves import urllib_parse
+
+
+def indent(text, prefix):
+ """Indent each non-empty line of text with the given prefix."""
+ return "".join(
+ (prefix + line if line.strip() else line) for line in text.splitlines(True)
+ )
+
+
+class FileURLRetrievalError(Exception):
+ pass
+
+
+class FolderContentsMaximumLimitError(Exception):
+ pass
+
+
+def parse_url(url, warning=True):
+ """Parse URLs especially for Google Drive links.
+
+ Args:
+ url: URL to parse.
+ warning: Whether to warn if the URL is not a download link.
+
+ Returns:
+ A tuple (file_id, is_download_link), where file_id is the ID of the
+ file on Google Drive, and is_download_link is a flag indicating
+ whether the URL is a download link.
+ """
+ parsed = urllib_parse.urlparse(url)
+ query = urllib_parse.parse_qs(parsed.query)
+ is_gdrive = parsed.hostname in ("drive.google.com", "docs.google.com")
+ is_download_link = parsed.path.endswith("/uc")
+
+ if not is_gdrive:
+ return None, is_download_link
+
+ file_id = query.get("id", [None])[0]
+ if file_id is None:
+ for pattern in (
+ r"^/file/d/(.*?)/(edit|view)$",
+ r"^/file/u/[0-9]+/d/(.*?)/(edit|view)$",
+ r"^/document/d/(.*?)/(edit|htmlview|view)$",
+ r"^/document/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$",
+ r"^/presentation/d/(.*?)/(edit|htmlview|view)$",
+ r"^/presentation/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$",
+ r"^/spreadsheets/d/(.*?)/(edit|htmlview|view)$",
+ r"^/spreadsheets/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$",
+ ):
+ match = re.match(pattern, parsed.path)
+ if match:
+ file_id = match.group(1)
+ break
+
+ if warning and not is_download_link:
+ warnings.warn(
+ "You specified a Google Drive link that is not the correct link "
+ "to download a file. You might want to try `--fuzzy` option "
+ f"or the following url: https://drive.google.com/uc?id={file_id}"
+ )
+
+ return file_id, is_download_link
+
+
+CHUNK_SIZE = 512 * 1024 # 512KB
+HOME = os.path.expanduser("~")
+
+
+def get_url_from_gdrive_confirmation(contents):
+ """Extract the download URL from a Google Drive confirmation page."""
+ for pattern in (
+ r'href="(\/uc\?export=download[^"]+)',
+ r'href="/open\?id=([^"]+)"',
+ r'"downloadUrl":"([^"]+)',
+ ):
+ match = re.search(pattern, contents)
+ if match:
+ url = match.group(1)
+ if pattern == r'href="/open\?id=([^"]+)"':
+ uuid = re.search(
+ r'(.*)', contents)
+ if match:
+ error = match.group(1)
+ raise FileURLRetrievalError(error)
+
+ raise FileURLRetrievalError(
+ "Cannot retrieve the public link of the file. "
+ "You may need to change the permission to "
+ "'Anyone with the link', or have had many accesses."
+ )
+
+
+def _get_session(proxy, use_cookies, return_cookies_file=False):
+ """Create a requests session with optional proxy and cookie handling."""
+ sess = requests.session()
+ sess.headers.update(
+ {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)"}
+ )
+
+ if proxy is not None:
+ sess.proxies = {"http": proxy, "https": proxy}
+ print("Using proxy:", proxy, file=sys.stderr)
+
+ cookies_file = os.path.join(HOME, ".cache/gdown/cookies.json")
+ if os.path.exists(cookies_file) and use_cookies:
+ with open(cookies_file) as f:
+ cookies = json.load(f)
+ for k, v in cookies:
+ sess.cookies[k] = v
+
+ return (sess, cookies_file) if return_cookies_file else sess
+
+
+def download(
+ url=None,
+ output=None,
+ quiet=False,
+ proxy=None,
+ speed=None,
+ use_cookies=True,
+ verify=True,
+ id=None,
+ fuzzy=True,
+ resume=False,
+ format=None,
+):
+ """Download file from URL.
+
+ Parameters
+ ----------
+ url: str
+ URL. Google Drive URL is also supported.
+ output: str
+ Output filename. Default is basename of URL.
+ quiet: bool
+ Suppress terminal output. Default is False.
+ proxy: str
+ Proxy.
+ speed: float
+ Download byte size per second (e.g., 256KB/s = 256 * 1024).
+ use_cookies: bool
+ Flag to use cookies. Default is True.
+ verify: bool or string
+ Either a bool, in which case it controls whether the server's TLS
+ certificate is verified, or a string, in which case it must be a path
+ to a CA bundle to use. Default is True.
+ id: str
+ Google Drive's file ID.
+ fuzzy: bool
+ Fuzzy extraction of Google Drive's file Id. Default is False.
+ resume: bool
+ Resume the download from existing tmp file if possible.
+ Default is False.
+ format: str, optional
+ Format of Google Docs, Spreadsheets and Slides. Default is:
+ - Google Docs: 'docx'
+ - Google Spreadsheet: 'xlsx'
+ - Google Slides: 'pptx'
+
+ Returns
+ -------
+ output: str
+ Output filename.
+ """
+ if not (id is None) ^ (url is None):
+ raise ValueError("Either url or id has to be specified")
+ if id is not None:
+ url = f"https://drive.google.com/uc?id={id}"
+
+ url_origin = url
+
+ sess, cookies_file = _get_session(
+ proxy=proxy, use_cookies=use_cookies, return_cookies_file=True
+ )
+
+ gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
+
+ if fuzzy and gdrive_file_id:
+ # overwrite the url with fuzzy match of a file id
+ url = f"https://drive.google.com/uc?id={gdrive_file_id}"
+ url_origin = url
+ is_gdrive_download_link = True
+
+ while True:
+ res = sess.get(url, stream=True, verify=verify)
+
+ if url == url_origin and res.status_code == 500:
+ # The file could be Google Docs or Spreadsheets.
+ url = f"https://drive.google.com/open?id={gdrive_file_id}"
+ continue
+
+ if res.headers["Content-Type"].startswith("text/html"):
+ title = re.search("(.+)", res.text)
+ if title:
+ title = title.group(1)
+ if title.endswith(" - Google Docs"):
+ url = f"https://docs.google.com/document/d/{gdrive_file_id}/export?format={'docx' if format is None else format}"
+ continue
+ if title.endswith(" - Google Sheets"):
+ url = f"https://docs.google.com/spreadsheets/d/{gdrive_file_id}/export?format={'xlsx' if format is None else format}"
+ continue
+ if title.endswith(" - Google Slides"):
+ url = f"https://docs.google.com/presentation/d/{gdrive_file_id}/export?format={'pptx' if format is None else format}"
+ continue
+ elif (
+ "Content-Disposition" in res.headers
+ and res.headers["Content-Disposition"].endswith("pptx")
+ and format not in (None, "pptx")
+ ):
+ url = f"https://docs.google.com/presentation/d/{gdrive_file_id}/export?format={'pptx' if format is None else format}"
+ continue
+
+ if use_cookies:
+ os.makedirs(os.path.dirname(cookies_file), exist_ok=True)
+ with open(cookies_file, "w") as f:
+ cookies = [
+ (k, v)
+ for k, v in sess.cookies.items()
+ if not k.startswith("download_warning_")
+ ]
+ json.dump(cookies, f, indent=2)
+
+ if "Content-Disposition" in res.headers:
+ # This is the file
+ break
+ if not (gdrive_file_id and is_gdrive_download_link):
+ break
+
+ # Need to redirect with confirmation
+ try:
+ url = get_url_from_gdrive_confirmation(res.text)
+ except FileURLRetrievalError as e:
+ message = (
+ "Failed to retrieve file url:\n\n"
+ "{}\n\n"
+ "You may still be able to access the file from the browser:"
+ f"\n\n\t{url_origin}\n\n"
+ "but Gdown can't. Please check connections and permissions."
+ ).format(indent("\n".join(textwrap.wrap(str(e))), prefix="\t"))
+ raise FileURLRetrievalError(message)
+
+ if gdrive_file_id and is_gdrive_download_link:
+ content_disposition = urllib_parse.unquote(res.headers["Content-Disposition"])
+ filename_from_url = (
+ re.search(r"filename\*=UTF-8''(.*)", content_disposition)
+ or re.search(r'filename=["\']?(.*?)["\']?$', content_disposition)
+ ).group(1)
+ filename_from_url = filename_from_url.replace(os.path.sep, "_")
+ else:
+ filename_from_url = os.path.basename(url)
+
+ output = output or filename_from_url
+
+ output_is_path = isinstance(output, six.string_types)
+ if output_is_path and output.endswith(os.path.sep):
+ os.makedirs(output, exist_ok=True)
+ output = os.path.join(output, filename_from_url)
+
+ if output_is_path:
+ temp_dir = os.path.dirname(output) or "."
+ prefix = os.path.basename(output)
+ existing_tmp_files = [
+ os.path.join(temp_dir, file)
+ for file in os.listdir(temp_dir)
+ if file.startswith(prefix)
+ ]
+ if resume and existing_tmp_files:
+ if len(existing_tmp_files) > 1:
+ print(
+ "There are multiple temporary files to resume:",
+ file=sys.stderr,
+ )
+ for file in existing_tmp_files:
+ print(f"\t{file}", file=sys.stderr)
+ print(
+ "Please remove them except one to resume downloading.",
+ file=sys.stderr,
+ )
+ return
+ tmp_file = existing_tmp_files[0]
+ else:
+ resume = False
+ tmp_file = tempfile.mktemp(
+ suffix=tempfile.template, prefix=prefix, dir=temp_dir
+ )
+ f = open(tmp_file, "ab")
+ else:
+ tmp_file = None
+ f = output
+
+ if tmp_file is not None and f.tell() != 0:
+ headers = {"Range": f"bytes={f.tell()}-"}
+ res = sess.get(url, headers=headers, stream=True, verify=verify)
+
+ if not quiet:
+ if resume:
+ print("Resume:", tmp_file, file=sys.stderr)
+ print(
+ "To:",
+ os.path.abspath(output) if output_is_path else output,
+ file=sys.stderr,
+ )
+
+ try:
+ total = int(res.headers.get("Content-Length", 0))
+ if not quiet:
+ pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True)
+ t_start = time.time()
+ for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
+ f.write(chunk)
+ if not quiet:
+ pbar.update(len(chunk))
+ if speed is not None:
+ elapsed_time_expected = 1.0 * pbar.n / speed
+ elapsed_time = time.time() - t_start
+ if elapsed_time < elapsed_time_expected:
+ time.sleep(elapsed_time_expected - elapsed_time)
+ if not quiet:
+ pbar.close()
+ if tmp_file:
+ f.close()
+ shutil.move(tmp_file, output)
+ finally:
+ sess.close()
+
+ return output
diff --git a/rvc/lib/tools/launch_tensorboard.py b/rvc/lib/tools/launch_tensorboard.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f74e316762b737037f7b8e4448a1042553d5651
--- /dev/null
+++ b/rvc/lib/tools/launch_tensorboard.py
@@ -0,0 +1,21 @@
+import time
+import logging
+from tensorboard import program
+
+log_path = "logs"
+
+
+def launch_tensorboard_pipeline():
+ logging.getLogger("root").setLevel(logging.WARNING)
+ logging.getLogger("tensorboard").setLevel(logging.WARNING)
+
+ tb = program.TensorBoard()
+ tb.configure(argv=[None, "--logdir", log_path])
+ url = tb.launch()
+
+ print(
+ f"Access the tensorboard using the following link:\n{url}?pinnedCards=%5B%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Ftotal%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fd%2Ftotal%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Fkl%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Fmel%22%7D%5D"
+ )
+
+ while True:
+ time.sleep(600)
diff --git a/rvc/lib/tools/model_download.py b/rvc/lib/tools/model_download.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab1b136e265994ea879e501bab1c93eebff6f468
--- /dev/null
+++ b/rvc/lib/tools/model_download.py
@@ -0,0 +1,385 @@
+import os
+import re
+import six
+import sys
+import wget
+import shutil
+import zipfile
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import unquote, urlencode, parse_qs, urlparse
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from rvc.lib.utils import format_title
+from rvc.lib.tools import gdown
+
+
+def find_folder_parent(search_dir, folder_name):
+ for dirpath, dirnames, _ in os.walk(search_dir):
+ if folder_name in dirnames:
+ return os.path.abspath(dirpath)
+ return None
+
+
+file_path = find_folder_parent(now_dir, "logs")
+zips_path = os.path.join(file_path, "zips")
+
+
+def search_pth_index(folder):
+ pth_paths = [
+ os.path.join(folder, file)
+ for file in os.listdir(folder)
+ if os.path.isfile(os.path.join(folder, file)) and file.endswith(".pth")
+ ]
+ index_paths = [
+ os.path.join(folder, file)
+ for file in os.listdir(folder)
+ if os.path.isfile(os.path.join(folder, file)) and file.endswith(".index")
+ ]
+
+ return pth_paths, index_paths
+
+
+def get_mediafire_download_link(url):
+ response = requests.get(url)
+ response.raise_for_status()
+ soup = BeautifulSoup(response.text, "html.parser")
+ download_button = soup.find(
+ "a", {"class": "input popsok", "aria-label": "Download file"}
+ )
+ if download_button:
+ download_link = download_button.get("href")
+ return download_link
+ else:
+ return None
+
+
+def download_from_url(url):
+ os.makedirs(zips_path, exist_ok=True)
+ if url != "":
+ if "drive.google.com" in url:
+ if "file/d/" in url:
+ file_id = url.split("file/d/")[1].split("/")[0]
+ elif "id=" in url:
+ file_id = url.split("id=")[1].split("&")[0]
+ else:
+ return None
+
+ if file_id:
+ os.chdir(zips_path)
+ try:
+ gdown.download(
+ f"https://drive.google.com/uc?id={file_id}",
+ quiet=True,
+ fuzzy=True,
+ )
+ except Exception as error:
+ error_message = str(
+ f"An error occurred downloading the file: {error}"
+ )
+ if (
+ "Too many users have viewed or downloaded this file recently"
+ in error_message
+ ):
+ os.chdir(now_dir)
+ return "too much use"
+ elif (
+ "Cannot retrieve the public link of the file." in error_message
+ ):
+ os.chdir(now_dir)
+ return "private link"
+ else:
+ print(error_message)
+ os.chdir(now_dir)
+ return None
+ elif "disk.yandex.ru" in url:
+ base_url = "https://cloud-api.yandex.net/v1/disk/public/resources/download?"
+ public_key = url
+ final_url = base_url + urlencode(dict(public_key=public_key))
+ response = requests.get(final_url)
+ download_url = response.json()["href"]
+ download_response = requests.get(download_url)
+
+ if download_response.status_code == 200:
+ filename = parse_qs(urlparse(unquote(download_url)).query).get(
+ "filename", [""]
+ )[0]
+ if filename:
+ os.chdir(zips_path)
+ with open(filename, "wb") as f:
+ f.write(download_response.content)
+ else:
+ print("Failed to get filename from URL.")
+ return None
+
+ elif "pixeldrain.com" in url:
+ try:
+ file_id = url.split("pixeldrain.com/u/")[1]
+ os.chdir(zips_path)
+ print(file_id)
+ response = requests.get(f"https://pixeldrain.com/api/file/{file_id}")
+ if response.status_code == 200:
+ file_name = (
+ response.headers.get("Content-Disposition")
+ .split("filename=")[-1]
+ .strip('";')
+ )
+ os.makedirs(zips_path, exist_ok=True)
+ with open(os.path.join(zips_path, file_name), "wb") as newfile:
+ newfile.write(response.content)
+ os.chdir(file_path)
+ return "downloaded"
+ else:
+ os.chdir(file_path)
+ return None
+ except Exception as error:
+ print(f"An error occurred downloading the file: {error}")
+ os.chdir(file_path)
+ return None
+
+ elif "cdn.discordapp.com" in url:
+ file = requests.get(url)
+ os.chdir(zips_path)
+ if file.status_code == 200:
+ name = url.split("/")
+ with open(os.path.join(name[-1]), "wb") as newfile:
+ newfile.write(file.content)
+ else:
+ return None
+ elif "/blob/" in url or "/resolve/" in url:
+ os.chdir(zips_path)
+ if "/blob/" in url:
+ url = url.replace("/blob/", "/resolve/")
+
+ response = requests.get(url, stream=True)
+ if response.status_code == 200:
+ content_disposition = six.moves.urllib_parse.unquote(
+ response.headers["Content-Disposition"]
+ )
+ m = re.search(r'filename="([^"]+)"', content_disposition)
+ file_name = m.groups()[0]
+ file_name = file_name.replace(os.path.sep, "_")
+ total_size_in_bytes = int(response.headers.get("content-length", 0))
+ block_size = 1024
+ progress_bar_length = 50
+ progress = 0
+
+ with open(os.path.join(zips_path, file_name), "wb") as file:
+ for data in response.iter_content(block_size):
+ file.write(data)
+ progress += len(data)
+ progress_percent = int((progress / total_size_in_bytes) * 100)
+ num_dots = int(
+ (progress / total_size_in_bytes) * progress_bar_length
+ )
+ progress_bar = (
+ "["
+ + "." * num_dots
+ + " " * (progress_bar_length - num_dots)
+ + "]"
+ )
+ print(
+ f"{progress_percent}% {progress_bar} {progress}/{total_size_in_bytes} ",
+ end="\r",
+ )
+ if progress_percent == 100:
+ print("\n")
+
+ else:
+ os.chdir(now_dir)
+ return None
+ elif "/tree/main" in url:
+ os.chdir(zips_path)
+ response = requests.get(url)
+ soup = BeautifulSoup(response.content, "html.parser")
+ temp_url = ""
+ for link in soup.find_all("a", href=True):
+ if link["href"].endswith(".zip"):
+ temp_url = link["href"]
+ break
+ if temp_url:
+ url = temp_url
+ url = url.replace("blob", "resolve")
+ if "huggingface.co" not in url:
+ url = "https://huggingface.co" + url
+
+ wget.download(url)
+ else:
+ os.chdir(now_dir)
+ return None
+ elif "applio.org" in url:
+ parts = url.split("/")
+ id_with_query = parts[-1]
+ id_parts = id_with_query.split("?")
+ id_number = id_parts[0]
+
+ url = "https://cjtfqzjfdimgpvpwhzlv.supabase.co/rest/v1/models"
+ headers = {
+ "apikey": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImNqdGZxempmZGltZ3B2cHdoemx2Iiwicm9sZSI6ImFub24iLCJpYXQiOjE2OTUxNjczODgsImV4cCI6MjAxMDc0MzM4OH0.7z5WMIbjR99c2Ooc0ma7B_FyGq10G8X-alkCYTkKR10"
+ }
+
+ params = {"id": f"eq.{id_number}"}
+ response = requests.get(url, headers=headers, params=params)
+ if response.status_code == 200:
+ json_response = response.json()
+ print(json_response)
+ if json_response:
+ link = json_response[0]["link"]
+ verify = download_from_url(link)
+ if verify == "downloaded":
+ return "downloaded"
+ else:
+ return None
+ else:
+ return None
+ else:
+ try:
+ os.chdir(zips_path)
+ wget.download(url)
+ except Exception as error:
+ os.chdir(now_dir)
+ print(f"An error occurred downloading the file: {error}")
+ return None
+
+ for currentPath, _, zipFiles in os.walk(zips_path):
+ for Files in zipFiles:
+ filePart = Files.split(".")
+ extensionFile = filePart[len(filePart) - 1]
+ filePart.pop()
+ nameFile = "_".join(filePart)
+ realPath = os.path.join(currentPath, Files)
+ os.rename(realPath, nameFile + "." + extensionFile)
+
+ os.chdir(now_dir)
+ return "downloaded"
+
+ os.chdir(now_dir)
+ return None
+
+
+def extract_and_show_progress(zipfile_path, unzips_path):
+ try:
+ with zipfile.ZipFile(zipfile_path, "r") as zip_ref:
+ for file_info in zip_ref.infolist():
+ zip_ref.extract(file_info, unzips_path)
+ os.remove(zipfile_path)
+ return True
+ except Exception as error:
+ print(f"An error occurred extracting the zip file: {error}")
+ return False
+
+
+def unzip_file(zip_path, zip_file_name):
+ zip_file_path = os.path.join(zip_path, zip_file_name + ".zip")
+ extract_path = os.path.join(file_path, zip_file_name)
+ with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+ zip_ref.extractall(extract_path)
+ os.remove(zip_file_path)
+
+
+def model_download_pipeline(url: str):
+ try:
+ verify = download_from_url(url)
+ if verify == "downloaded":
+ extract_folder_path = ""
+ for filename in os.listdir(zips_path):
+ if filename.endswith(".zip"):
+ zipfile_path = os.path.join(zips_path, filename)
+ print("Proceeding with the extraction...")
+
+ model_zip = os.path.basename(zipfile_path)
+ model_name = format_title(model_zip.split(".zip")[0])
+ extract_folder_path = os.path.join(
+ "logs",
+ os.path.normpath(model_name),
+ )
+ success = extract_and_show_progress(
+ zipfile_path, extract_folder_path
+ )
+
+ macosx_path = os.path.join(extract_folder_path, "__MACOSX")
+ if os.path.exists(macosx_path):
+ shutil.rmtree(macosx_path)
+
+ subfolders = [
+ f
+ for f in os.listdir(extract_folder_path)
+ if os.path.isdir(os.path.join(extract_folder_path, f))
+ ]
+ if len(subfolders) == 1:
+ subfolder_path = os.path.join(
+ extract_folder_path, subfolders[0]
+ )
+ for item in os.listdir(subfolder_path):
+ s = os.path.join(subfolder_path, item)
+ d = os.path.join(extract_folder_path, item)
+ shutil.move(s, d)
+ os.rmdir(subfolder_path)
+
+ for item in os.listdir(extract_folder_path):
+ if ".pth" in item:
+ file_name = item.split(".pth")[0]
+ if file_name != model_name:
+ os.rename(
+ os.path.join(extract_folder_path, item),
+ os.path.join(
+ extract_folder_path, model_name + ".pth"
+ ),
+ )
+ else:
+ if "v2" not in item:
+ if "_nprobe_1_" in item and "_v1" in item:
+ file_name = item.split("_nprobe_1_")[1].split(
+ "_v1"
+ )[0]
+ if file_name != model_name:
+ new_file_name = (
+ item.split("_nprobe_1_")[0]
+ + "_nprobe_1_"
+ + model_name
+ + "_v1"
+ )
+ os.rename(
+ os.path.join(extract_folder_path, item),
+ os.path.join(
+ extract_folder_path,
+ new_file_name + ".index",
+ ),
+ )
+ else:
+ if "_nprobe_1_" in item and "_v2" in item:
+ file_name = item.split("_nprobe_1_")[1].split(
+ "_v2"
+ )[0]
+ if file_name != model_name:
+ new_file_name = (
+ item.split("_nprobe_1_")[0]
+ + "_nprobe_1_"
+ + model_name
+ + "_v2"
+ )
+ os.rename(
+ os.path.join(extract_folder_path, item),
+ os.path.join(
+ extract_folder_path,
+ new_file_name + ".index",
+ ),
+ )
+
+ if success:
+ print(f"Model {model_name} downloaded!")
+ else:
+ print(f"Error downloading {model_name}")
+ return "Error"
+ if extract_folder_path == "":
+ print("Zip file was not found.")
+ return "Error"
+ result = search_pth_index(extract_folder_path)
+ return result
+ else:
+ return "Error"
+ except Exception as error:
+ print(f"An unexpected error occurred: {error}")
+ return "Error"
diff --git a/rvc/lib/tools/prerequisites_download.py b/rvc/lib/tools/prerequisites_download.py
new file mode 100644
index 0000000000000000000000000000000000000000..e38356212dedbc54bc4f964abbcd357ba5e19e10
--- /dev/null
+++ b/rvc/lib/tools/prerequisites_download.py
@@ -0,0 +1,160 @@
+import os
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import requests
+
+url_base = "https://huggingface.co/IAHispano/Applio/resolve/main/Resources"
+
+pretraineds_v1_list = [
+ (
+ "pretrained_v1/",
+ [
+ "D32k.pth",
+ "D40k.pth",
+ "D48k.pth",
+ "G32k.pth",
+ "G40k.pth",
+ "G48k.pth",
+ "f0D32k.pth",
+ "f0D40k.pth",
+ "f0D48k.pth",
+ "f0G32k.pth",
+ "f0G40k.pth",
+ "f0G48k.pth",
+ ],
+ )
+]
+pretraineds_v2_list = [
+ (
+ "pretrained_v2/",
+ [
+ "D32k.pth",
+ "D40k.pth",
+ "D48k.pth",
+ "G32k.pth",
+ "G40k.pth",
+ "G48k.pth",
+ "f0D32k.pth",
+ "f0D40k.pth",
+ "f0D48k.pth",
+ "f0G32k.pth",
+ "f0G40k.pth",
+ "f0G48k.pth",
+ ],
+ )
+]
+models_list = [("predictors/", ["rmvpe.pt", "fcpe.pt"])]
+embedders_list = [("embedders/contentvec/", ["pytorch_model.bin", "config.json"])]
+linux_executables_list = [("formant/", ["stftpitchshift"])]
+executables_list = [
+ ("", ["ffmpeg.exe", "ffprobe.exe"]),
+ ("formant/", ["stftpitchshift.exe"]),
+]
+
+folder_mapping_list = {
+ "pretrained_v1/": "rvc/models/pretraineds/pretrained_v1/",
+ "pretrained_v2/": "rvc/models/pretraineds/pretrained_v2/",
+ "embedders/contentvec/": "rvc/models/embedders/contentvec/",
+ "predictors/": "rvc/models/predictors/",
+ "formant/": "rvc/models/formant/",
+}
+
+
+def get_file_size_if_missing(file_list):
+ """
+ Calculate the total size of files to be downloaded only if they do not exist locally.
+ """
+ total_size = 0
+ for remote_folder, files in file_list:
+ local_folder = folder_mapping_list.get(remote_folder, "")
+ for file in files:
+ destination_path = os.path.join(local_folder, file)
+ if not os.path.exists(destination_path):
+ url = f"{url_base}/{remote_folder}{file}"
+ response = requests.head(url)
+ total_size += int(response.headers.get("content-length", 0))
+ return total_size
+
+
+def download_file(url, destination_path, global_bar):
+ """
+ Download a file from the given URL to the specified destination path,
+ updating the global progress bar as data is downloaded.
+ """
+
+ dir_name = os.path.dirname(destination_path)
+ if dir_name:
+ os.makedirs(dir_name, exist_ok=True)
+ response = requests.get(url, stream=True)
+ block_size = 1024
+ with open(destination_path, "wb") as file:
+ for data in response.iter_content(block_size):
+ file.write(data)
+ global_bar.update(len(data))
+
+
+def download_mapping_files(file_mapping_list, global_bar):
+ """
+ Download all files in the provided file mapping list using a thread pool executor,
+ and update the global progress bar as downloads progress.
+ """
+ with ThreadPoolExecutor() as executor:
+ futures = []
+ for remote_folder, file_list in file_mapping_list:
+ local_folder = folder_mapping_list.get(remote_folder, "")
+ for file in file_list:
+ destination_path = os.path.join(local_folder, file)
+ if not os.path.exists(destination_path):
+ url = f"{url_base}/{remote_folder}{file}"
+ futures.append(
+ executor.submit(
+ download_file, url, destination_path, global_bar
+ )
+ )
+ for future in futures:
+ future.result()
+
+
+def calculate_total_size(pretraineds_v1, pretraineds_v2, models, exe):
+ """
+ Calculate the total size of all files to be downloaded based on selected categories.
+ """
+ total_size = 0
+ if models:
+ total_size += get_file_size_if_missing(models_list)
+ total_size += get_file_size_if_missing(embedders_list)
+ if exe:
+ total_size += get_file_size_if_missing(
+ executables_list if os.name == "nt" else linux_executables_list
+ )
+ if pretraineds_v1:
+ total_size += get_file_size_if_missing(pretraineds_v1_list)
+ if pretraineds_v2:
+ total_size += get_file_size_if_missing(pretraineds_v2_list)
+ return total_size
+
+
+def prequisites_download_pipeline(pretraineds_v1, pretraineds_v2, models, exe):
+ """
+ Manage the download pipeline for different categories of files.
+ """
+ total_size = calculate_total_size(pretraineds_v1, pretraineds_v2, models, exe)
+
+ if total_size > 0:
+ with tqdm(
+ total=total_size, unit="iB", unit_scale=True, desc="Downloading all files"
+ ) as global_bar:
+ if models:
+ download_mapping_files(models_list, global_bar)
+ download_mapping_files(embedders_list, global_bar)
+ if exe:
+ download_mapping_files(
+ executables_list if os.name == "nt" else linux_executables_list,
+ global_bar,
+ )
+ if pretraineds_v1:
+ download_mapping_files(pretraineds_v1_list, global_bar)
+ if pretraineds_v2:
+ download_mapping_files(pretraineds_v2_list, global_bar)
+ else:
+ pass
diff --git a/rvc/lib/tools/pretrained_selector.py b/rvc/lib/tools/pretrained_selector.py
new file mode 100644
index 0000000000000000000000000000000000000000..e982fac5078df43b3477f3e3d478a76ff85488c5
--- /dev/null
+++ b/rvc/lib/tools/pretrained_selector.py
@@ -0,0 +1,63 @@
+def pretrained_selector(pitch_guidance):
+ if pitch_guidance == True:
+ return {
+ "v1": {
+ 32000: (
+ "rvc/models/pretraineds/pretrained_v1/f0G32k.pth",
+ "rvc/models/pretraineds/pretrained_v1/f0D32k.pth",
+ ),
+ 40000: (
+ "rvc/models/pretraineds/pretrained_v1/f0G40k.pth",
+ "rvc/models/pretraineds/pretrained_v1/f0D40k.pth",
+ ),
+ 48000: (
+ "rvc/models/pretraineds/pretrained_v1/f0G48k.pth",
+ "rvc/models/pretraineds/pretrained_v1/f0D48k.pth",
+ ),
+ },
+ "v2": {
+ 32000: (
+ "rvc/models/pretraineds/pretrained_v2/f0G32k.pth",
+ "rvc/models/pretraineds/pretrained_v2/f0D32k.pth",
+ ),
+ 40000: (
+ "rvc/models/pretraineds/pretrained_v2/f0G40k.pth",
+ "rvc/models/pretraineds/pretrained_v2/f0D40k.pth",
+ ),
+ 48000: (
+ "rvc/models/pretraineds/pretrained_v2/f0G48k.pth",
+ "rvc/models/pretraineds/pretrained_v2/f0D48k.pth",
+ ),
+ },
+ }
+ elif pitch_guidance == False:
+ return {
+ "v1": {
+ 32000: (
+ "rvc/models/pretraineds/pretrained_v1/G32k.pth",
+ "rvc/models/pretraineds/pretrained_v1/D32k.pth",
+ ),
+ 40000: (
+ "rvc/models/pretraineds/pretrained_v1/G40k.pth",
+ "rvc/models/pretraineds/pretrained_v1/D40k.pth",
+ ),
+ 48000: (
+ "rvc/models/pretraineds/pretrained_v1/G48k.pth",
+ "rvc/models/pretraineds/pretrained_v1/D48k.pth",
+ ),
+ },
+ "v2": {
+ 32000: (
+ "rvc/models/pretraineds/pretrained_v2/G32k.pth",
+ "rvc/models/pretraineds/pretrained_v2/D32k.pth",
+ ),
+ 40000: (
+ "rvc/models/pretraineds/pretrained_v2/G40k.pth",
+ "rvc/models/pretraineds/pretrained_v2/D40k.pth",
+ ),
+ 48000: (
+ "rvc/models/pretraineds/pretrained_v2/G48k.pth",
+ "rvc/models/pretraineds/pretrained_v2/D48k.pth",
+ ),
+ },
+ }
diff --git a/rvc/lib/tools/split_audio.py b/rvc/lib/tools/split_audio.py
new file mode 100644
index 0000000000000000000000000000000000000000..a946cf52a84613a62843198196001bf40119bc3f
--- /dev/null
+++ b/rvc/lib/tools/split_audio.py
@@ -0,0 +1,107 @@
+from pydub.silence import detect_nonsilent
+from pydub import AudioSegment
+import numpy as np
+import re
+import os
+
+from rvc.lib.utils import format_title
+
+
+def process_audio(file_path):
+ try:
+ # load audio file
+ song = AudioSegment.from_file(file_path)
+
+ # set silence threshold and duration
+ silence_thresh = -70 # dB
+ min_silence_len = 750 # ms, adjust as needed
+
+ # detect nonsilent parts
+ nonsilent_parts = detect_nonsilent(
+ song, min_silence_len=min_silence_len, silence_thresh=silence_thresh
+ )
+
+ # Create a new directory to store chunks
+ file_dir = os.path.dirname(file_path)
+ file_name = os.path.basename(file_path).split(".")[0]
+ file_name = format_title(file_name)
+ new_dir_path = os.path.join(file_dir, file_name)
+ os.makedirs(new_dir_path, exist_ok=True)
+
+ # Check if timestamps file exists, if so delete it
+ timestamps_file = os.path.join(file_dir, f"{file_name}_timestamps.txt")
+ if os.path.isfile(timestamps_file):
+ os.remove(timestamps_file)
+
+ # export chunks and save start times
+ segment_count = 0
+ for i, (start_i, end_i) in enumerate(nonsilent_parts):
+ chunk = song[start_i:end_i]
+ chunk_file_path = os.path.join(new_dir_path, f"chunk{i}.wav")
+ chunk.export(chunk_file_path, format="wav")
+
+ print(f"Segment {i} created!")
+ segment_count += 1
+
+ # write start times to file
+ with open(timestamps_file, "a", encoding="utf-8") as f:
+ f.write(f"{chunk_file_path} starts at {start_i} ms\n")
+
+ print(f"Total segments created: {segment_count}")
+ print(f"Split all chunks for {file_path} successfully!")
+
+ return "Finish", new_dir_path
+
+ except Exception as error:
+ print(f"An error occurred splitting the audio: {error}")
+ return "Error", None
+
+
+def merge_audio(timestamps_file):
+ try:
+ # Extract prefix from the timestamps filename
+ prefix = os.path.basename(timestamps_file).replace("_timestamps.txt", "")
+ timestamps_dir = os.path.dirname(timestamps_file)
+
+ # Open the timestamps file
+ with open(timestamps_file, "r", encoding="utf-8") as f:
+ lines = f.readlines()
+
+ # Initialize empty list to hold audio segments
+ audio_segments = []
+ last_end_time = 0
+
+ print(f"Processing file: {timestamps_file}")
+
+ for line in lines:
+ # Extract filename and start time from line
+ match = re.search(r"(chunk\d+.wav) starts at (\d+) ms", line)
+ if match:
+ filename, start_time = match.groups()
+ start_time = int(start_time)
+
+ # Construct the complete path to the chunk file
+ chunk_file = os.path.join(timestamps_dir, prefix, filename)
+
+ # Add silence from last_end_time to start_time
+ silence_duration = max(start_time - last_end_time, 0)
+ silence = AudioSegment.silent(duration=silence_duration)
+ audio_segments.append(silence)
+
+ # Load audio file and append to list
+ audio = AudioSegment.from_wav(chunk_file)
+ audio_segments.append(audio)
+
+ # Update last_end_time
+ last_end_time = start_time + len(audio)
+
+ print(f"Processed chunk: {chunk_file}")
+
+ # Concatenate all audio_segments and export
+ merged_audio = sum(audio_segments)
+ merged_audio_np = np.array(merged_audio.get_array_of_samples())
+ # print(f"Exported merged file: {merged_filename}\n")
+ return merged_audio.frame_rate, merged_audio_np
+
+ except Exception as error:
+ print(f"An error occurred splitting the audio: {error}")
diff --git a/rvc/lib/tools/tts.py b/rvc/lib/tools/tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9994dbd0db708581a6fc1fc53a8b24261711d69
--- /dev/null
+++ b/rvc/lib/tools/tts.py
@@ -0,0 +1,20 @@
+import sys
+import asyncio
+import edge_tts
+
+
+async def main():
+ # Parse command line arguments
+ text = str(sys.argv[1])
+ voice = str(sys.argv[2])
+ rate = int(sys.argv[3])
+ output_file = str(sys.argv[4])
+
+ rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
+
+ await edge_tts.Communicate(text, voice, rate=rates).save(output_file)
+ print(f"TTS with {voice} completed. Output TTS file: '{output_file}'")
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/rvc/lib/tools/tts_voices.json b/rvc/lib/tools/tts_voices.json
new file mode 100644
index 0000000000000000000000000000000000000000..b76cf447ccfacff86e844360caeac6c8e0b27e95
--- /dev/null
+++ b/rvc/lib/tools/tts_voices.json
@@ -0,0 +1,5748 @@
+[
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)",
+ "ShortName": "af-ZA-AdriNeural",
+ "Gender": "Female",
+ "Locale": "af-ZA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Adri Online (Natural) - Afrikaans (South Africa)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (af-ZA, WillemNeural)",
+ "ShortName": "af-ZA-WillemNeural",
+ "Gender": "Male",
+ "Locale": "af-ZA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Willem Online (Natural) - Afrikaans (South Africa)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sq-AL, AnilaNeural)",
+ "ShortName": "sq-AL-AnilaNeural",
+ "Gender": "Female",
+ "Locale": "sq-AL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Anila Online (Natural) - Albanian (Albania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sq-AL, IlirNeural)",
+ "ShortName": "sq-AL-IlirNeural",
+ "Gender": "Male",
+ "Locale": "sq-AL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ilir Online (Natural) - Albanian (Albania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (am-ET, AmehaNeural)",
+ "ShortName": "am-ET-AmehaNeural",
+ "Gender": "Male",
+ "Locale": "am-ET",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ameha Online (Natural) - Amharic (Ethiopia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (am-ET, MekdesNeural)",
+ "ShortName": "am-ET-MekdesNeural",
+ "Gender": "Female",
+ "Locale": "am-ET",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mekdes Online (Natural) - Amharic (Ethiopia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-DZ, AminaNeural)",
+ "ShortName": "ar-DZ-AminaNeural",
+ "Gender": "Female",
+ "Locale": "ar-DZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Amina Online (Natural) - Arabic (Algeria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-DZ, IsmaelNeural)",
+ "ShortName": "ar-DZ-IsmaelNeural",
+ "Gender": "Male",
+ "Locale": "ar-DZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ismael Online (Natural) - Arabic (Algeria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)",
+ "ShortName": "ar-BH-AliNeural",
+ "Gender": "Male",
+ "Locale": "ar-BH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ali Online (Natural) - Arabic (Bahrain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-BH, LailaNeural)",
+ "ShortName": "ar-BH-LailaNeural",
+ "Gender": "Female",
+ "Locale": "ar-BH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Laila Online (Natural) - Arabic (Bahrain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-EG, SalmaNeural)",
+ "ShortName": "ar-EG-SalmaNeural",
+ "Gender": "Female",
+ "Locale": "ar-EG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Salma Online (Natural) - Arabic (Egypt)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-EG, ShakirNeural)",
+ "ShortName": "ar-EG-ShakirNeural",
+ "Gender": "Male",
+ "Locale": "ar-EG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Shakir Online (Natural) - Arabic (Egypt)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-IQ, BasselNeural)",
+ "ShortName": "ar-IQ-BasselNeural",
+ "Gender": "Male",
+ "Locale": "ar-IQ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Bassel Online (Natural) - Arabic (Iraq)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-IQ, RanaNeural)",
+ "ShortName": "ar-IQ-RanaNeural",
+ "Gender": "Female",
+ "Locale": "ar-IQ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rana Online (Natural) - Arabic (Iraq)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-JO, SanaNeural)",
+ "ShortName": "ar-JO-SanaNeural",
+ "Gender": "Female",
+ "Locale": "ar-JO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sana Online (Natural) - Arabic (Jordan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-JO, TaimNeural)",
+ "ShortName": "ar-JO-TaimNeural",
+ "Gender": "Male",
+ "Locale": "ar-JO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Taim Online (Natural) - Arabic (Jordan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-KW, FahedNeural)",
+ "ShortName": "ar-KW-FahedNeural",
+ "Gender": "Male",
+ "Locale": "ar-KW",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Fahed Online (Natural) - Arabic (Kuwait)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-KW, NouraNeural)",
+ "ShortName": "ar-KW-NouraNeural",
+ "Gender": "Female",
+ "Locale": "ar-KW",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Noura Online (Natural) - Arabic (Kuwait)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-LB, LaylaNeural)",
+ "ShortName": "ar-LB-LaylaNeural",
+ "Gender": "Female",
+ "Locale": "ar-LB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Layla Online (Natural) - Arabic (Lebanon)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-LB, RamiNeural)",
+ "ShortName": "ar-LB-RamiNeural",
+ "Gender": "Male",
+ "Locale": "ar-LB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rami Online (Natural) - Arabic (Lebanon)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-LY, ImanNeural)",
+ "ShortName": "ar-LY-ImanNeural",
+ "Gender": "Female",
+ "Locale": "ar-LY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Iman Online (Natural) - Arabic (Libya)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-LY, OmarNeural)",
+ "ShortName": "ar-LY-OmarNeural",
+ "Gender": "Male",
+ "Locale": "ar-LY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Omar Online (Natural) - Arabic (Libya)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-MA, JamalNeural)",
+ "ShortName": "ar-MA-JamalNeural",
+ "Gender": "Male",
+ "Locale": "ar-MA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jamal Online (Natural) - Arabic (Morocco)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-MA, MounaNeural)",
+ "ShortName": "ar-MA-MounaNeural",
+ "Gender": "Female",
+ "Locale": "ar-MA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mouna Online (Natural) - Arabic (Morocco)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-OM, AbdullahNeural)",
+ "ShortName": "ar-OM-AbdullahNeural",
+ "Gender": "Male",
+ "Locale": "ar-OM",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Abdullah Online (Natural) - Arabic (Oman)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-OM, AyshaNeural)",
+ "ShortName": "ar-OM-AyshaNeural",
+ "Gender": "Female",
+ "Locale": "ar-OM",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Aysha Online (Natural) - Arabic (Oman)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-QA, AmalNeural)",
+ "ShortName": "ar-QA-AmalNeural",
+ "Gender": "Female",
+ "Locale": "ar-QA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Amal Online (Natural) - Arabic (Qatar)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-QA, MoazNeural)",
+ "ShortName": "ar-QA-MoazNeural",
+ "Gender": "Male",
+ "Locale": "ar-QA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Moaz Online (Natural) - Arabic (Qatar)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-SA, HamedNeural)",
+ "ShortName": "ar-SA-HamedNeural",
+ "Gender": "Male",
+ "Locale": "ar-SA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Hamed Online (Natural) - Arabic (Saudi Arabia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-SA, ZariyahNeural)",
+ "ShortName": "ar-SA-ZariyahNeural",
+ "Gender": "Female",
+ "Locale": "ar-SA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Zariyah Online (Natural) - Arabic (Saudi Arabia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-SY, AmanyNeural)",
+ "ShortName": "ar-SY-AmanyNeural",
+ "Gender": "Female",
+ "Locale": "ar-SY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Amany Online (Natural) - Arabic (Syria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-SY, LaithNeural)",
+ "ShortName": "ar-SY-LaithNeural",
+ "Gender": "Male",
+ "Locale": "ar-SY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Laith Online (Natural) - Arabic (Syria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-TN, HediNeural)",
+ "ShortName": "ar-TN-HediNeural",
+ "Gender": "Male",
+ "Locale": "ar-TN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Hedi Online (Natural) - Arabic (Tunisia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-TN, ReemNeural)",
+ "ShortName": "ar-TN-ReemNeural",
+ "Gender": "Female",
+ "Locale": "ar-TN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Reem Online (Natural) - Arabic (Tunisia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-AE, FatimaNeural)",
+ "ShortName": "ar-AE-FatimaNeural",
+ "Gender": "Female",
+ "Locale": "ar-AE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Fatima Online (Natural) - Arabic (United Arab Emirates)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)",
+ "ShortName": "ar-AE-HamdanNeural",
+ "Gender": "Male",
+ "Locale": "ar-AE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Hamdan Online (Natural) - Arabic (United Arab Emirates)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-YE, MaryamNeural)",
+ "ShortName": "ar-YE-MaryamNeural",
+ "Gender": "Female",
+ "Locale": "ar-YE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Maryam Online (Natural) - Arabic (Yemen)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ar-YE, SalehNeural)",
+ "ShortName": "ar-YE-SalehNeural",
+ "Gender": "Male",
+ "Locale": "ar-YE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Saleh Online (Natural) - Arabic (Yemen)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (az-AZ, BabekNeural)",
+ "ShortName": "az-AZ-BabekNeural",
+ "Gender": "Male",
+ "Locale": "az-AZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Babek Online (Natural) - Azerbaijani (Azerbaijan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (az-AZ, BanuNeural)",
+ "ShortName": "az-AZ-BanuNeural",
+ "Gender": "Female",
+ "Locale": "az-AZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Banu Online (Natural) - Azerbaijani (Azerbaijan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bn-BD, NabanitaNeural)",
+ "ShortName": "bn-BD-NabanitaNeural",
+ "Gender": "Female",
+ "Locale": "bn-BD",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nabanita Online (Natural) - Bangla (Bangladesh)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bn-BD, PradeepNeural)",
+ "ShortName": "bn-BD-PradeepNeural",
+ "Gender": "Male",
+ "Locale": "bn-BD",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Pradeep Online (Natural) - Bangla (Bangladesh)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bn-IN, BashkarNeural)",
+ "ShortName": "bn-IN-BashkarNeural",
+ "Gender": "Male",
+ "Locale": "bn-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Bashkar Online (Natural) - Bangla (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bn-IN, TanishaaNeural)",
+ "ShortName": "bn-IN-TanishaaNeural",
+ "Gender": "Female",
+ "Locale": "bn-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Tanishaa Online (Natural) - Bengali (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bs-BA, GoranNeural)",
+ "ShortName": "bs-BA-GoranNeural",
+ "Gender": "Male",
+ "Locale": "bs-BA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Goran Online (Natural) - Bosnian (Bosnia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bs-BA, VesnaNeural)",
+ "ShortName": "bs-BA-VesnaNeural",
+ "Gender": "Female",
+ "Locale": "bs-BA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Vesna Online (Natural) - Bosnian (Bosnia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bg-BG, BorislavNeural)",
+ "ShortName": "bg-BG-BorislavNeural",
+ "Gender": "Male",
+ "Locale": "bg-BG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Borislav Online (Natural) - Bulgarian (Bulgaria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (bg-BG, KalinaNeural)",
+ "ShortName": "bg-BG-KalinaNeural",
+ "Gender": "Female",
+ "Locale": "bg-BG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Kalina Online (Natural) - Bulgarian (Bulgaria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (my-MM, NilarNeural)",
+ "ShortName": "my-MM-NilarNeural",
+ "Gender": "Female",
+ "Locale": "my-MM",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nilar Online (Natural) - Burmese (Myanmar)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (my-MM, ThihaNeural)",
+ "ShortName": "my-MM-ThihaNeural",
+ "Gender": "Male",
+ "Locale": "my-MM",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Thiha Online (Natural) - Burmese (Myanmar)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ca-ES, EnricNeural)",
+ "ShortName": "ca-ES-EnricNeural",
+ "Gender": "Male",
+ "Locale": "ca-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Enric Online (Natural) - Catalan (Spain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ca-ES, JoanaNeural)",
+ "ShortName": "ca-ES-JoanaNeural",
+ "Gender": "Female",
+ "Locale": "ca-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Joana Online (Natural) - Catalan (Spain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-HK, HiuGaaiNeural)",
+ "ShortName": "zh-HK-HiuGaaiNeural",
+ "Gender": "Female",
+ "Locale": "zh-HK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft HiuGaai Online (Natural) - Chinese (Cantonese Traditional)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-HK, HiuMaanNeural)",
+ "ShortName": "zh-HK-HiuMaanNeural",
+ "Gender": "Female",
+ "Locale": "zh-HK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft HiuMaan Online (Natural) - Chinese (Hong Kong)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-HK, WanLungNeural)",
+ "ShortName": "zh-HK-WanLungNeural",
+ "Gender": "Male",
+ "Locale": "zh-HK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft WanLung Online (Natural) - Chinese (Hong Kong)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)",
+ "ShortName": "zh-CN-XiaoxiaoNeural",
+ "Gender": "Female",
+ "Locale": "zh-CN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Xiaoxiao Online (Natural) - Chinese (Mainland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Warm"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoyiNeural)",
+ "ShortName": "zh-CN-XiaoyiNeural",
+ "Gender": "Female",
+ "Locale": "zh-CN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Xiaoyi Online (Natural) - Chinese (Mainland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Cartoon",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Lively"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunjianNeural)",
+ "ShortName": "zh-CN-YunjianNeural",
+ "Gender": "Male",
+ "Locale": "zh-CN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yunjian Online (Natural) - Chinese (Mainland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Sports",
+ " Novel"
+ ],
+ "VoicePersonalities": [
+ "Passion"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)",
+ "ShortName": "zh-CN-YunxiNeural",
+ "Gender": "Male",
+ "Locale": "zh-CN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yunxi Online (Natural) - Chinese (Mainland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Lively",
+ "Sunshine"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiaNeural)",
+ "ShortName": "zh-CN-YunxiaNeural",
+ "Gender": "Male",
+ "Locale": "zh-CN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yunxia Online (Natural) - Chinese (Mainland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Cartoon",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Cute"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN, YunyangNeural)",
+ "ShortName": "zh-CN-YunyangNeural",
+ "Gender": "Male",
+ "Locale": "zh-CN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yunyang Online (Natural) - Chinese (Mainland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News"
+ ],
+ "VoicePersonalities": [
+ "Professional",
+ "Reliable"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN-liaoning, XiaobeiNeural)",
+ "ShortName": "zh-CN-liaoning-XiaobeiNeural",
+ "Gender": "Female",
+ "Locale": "zh-CN-liaoning",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Xiaobei Online (Natural) - Chinese (Northeastern Mandarin)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Dialect"
+ ],
+ "VoicePersonalities": [
+ "Humorous"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoChenNeural)",
+ "ShortName": "zh-TW-HsiaoChenNeural",
+ "Gender": "Female",
+ "Locale": "zh-TW",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft HsiaoChen Online (Natural) - Chinese (Taiwan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-TW, YunJheNeural)",
+ "ShortName": "zh-TW-YunJheNeural",
+ "Gender": "Male",
+ "Locale": "zh-TW",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft YunJhe Online (Natural) - Chinese (Taiwan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-TW, HsiaoYuNeural)",
+ "ShortName": "zh-TW-HsiaoYuNeural",
+ "Gender": "Female",
+ "Locale": "zh-TW",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft HsiaoYu Online (Natural) - Chinese (Taiwanese Mandarin)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zh-CN-shaanxi, XiaoniNeural)",
+ "ShortName": "zh-CN-shaanxi-XiaoniNeural",
+ "Gender": "Female",
+ "Locale": "zh-CN-shaanxi",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Xiaoni Online (Natural) - Chinese (Zhongyuan Mandarin Shaanxi)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Dialect"
+ ],
+ "VoicePersonalities": [
+ "Bright"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (hr-HR, GabrijelaNeural)",
+ "ShortName": "hr-HR-GabrijelaNeural",
+ "Gender": "Female",
+ "Locale": "hr-HR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gabrijela Online (Natural) - Croatian (Croatia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (hr-HR, SreckoNeural)",
+ "ShortName": "hr-HR-SreckoNeural",
+ "Gender": "Male",
+ "Locale": "hr-HR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Srecko Online (Natural) - Croatian (Croatia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (cs-CZ, AntoninNeural)",
+ "ShortName": "cs-CZ-AntoninNeural",
+ "Gender": "Male",
+ "Locale": "cs-CZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Antonin Online (Natural) - Czech (Czech)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (cs-CZ, VlastaNeural)",
+ "ShortName": "cs-CZ-VlastaNeural",
+ "Gender": "Female",
+ "Locale": "cs-CZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Vlasta Online (Natural) - Czech (Czech)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (da-DK, ChristelNeural)",
+ "ShortName": "da-DK-ChristelNeural",
+ "Gender": "Female",
+ "Locale": "da-DK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Christel Online (Natural) - Danish (Denmark)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (da-DK, JeppeNeural)",
+ "ShortName": "da-DK-JeppeNeural",
+ "Gender": "Male",
+ "Locale": "da-DK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jeppe Online (Natural) - Danish (Denmark)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nl-BE, ArnaudNeural)",
+ "ShortName": "nl-BE-ArnaudNeural",
+ "Gender": "Male",
+ "Locale": "nl-BE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Arnaud Online (Natural) - Dutch (Belgium)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nl-BE, DenaNeural)",
+ "ShortName": "nl-BE-DenaNeural",
+ "Gender": "Female",
+ "Locale": "nl-BE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Dena Online (Natural) - Dutch (Belgium)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nl-NL, ColetteNeural)",
+ "ShortName": "nl-NL-ColetteNeural",
+ "Gender": "Female",
+ "Locale": "nl-NL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Colette Online (Natural) - Dutch (Netherlands)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nl-NL, FennaNeural)",
+ "ShortName": "nl-NL-FennaNeural",
+ "Gender": "Female",
+ "Locale": "nl-NL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Fenna Online (Natural) - Dutch (Netherlands)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nl-NL, MaartenNeural)",
+ "ShortName": "nl-NL-MaartenNeural",
+ "Gender": "Male",
+ "Locale": "nl-NL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Maarten Online (Natural) - Dutch (Netherlands)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-AU, NatashaNeural)",
+ "ShortName": "en-AU-NatashaNeural",
+ "Gender": "Female",
+ "Locale": "en-AU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Natasha Online (Natural) - English (Australia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-AU, WilliamNeural)",
+ "ShortName": "en-AU-WilliamNeural",
+ "Gender": "Male",
+ "Locale": "en-AU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft William Online (Natural) - English (Australia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-CA, ClaraNeural)",
+ "ShortName": "en-CA-ClaraNeural",
+ "Gender": "Female",
+ "Locale": "en-CA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Clara Online (Natural) - English (Canada)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-CA, LiamNeural)",
+ "ShortName": "en-CA-LiamNeural",
+ "Gender": "Male",
+ "Locale": "en-CA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Liam Online (Natural) - English (Canada)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-HK, SamNeural)",
+ "ShortName": "en-HK-SamNeural",
+ "Gender": "Male",
+ "Locale": "en-HK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sam Online (Natural) - English (Hongkong)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-HK, YanNeural)",
+ "ShortName": "en-HK-YanNeural",
+ "Gender": "Female",
+ "Locale": "en-HK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yan Online (Natural) - English (Hongkong)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaExpressiveNeural)",
+ "ShortName": "en-IN-NeerjaExpressiveNeural",
+ "Gender": "Female",
+ "Locale": "en-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Neerja Online (Natural) - English (India) (Preview)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-IN, NeerjaNeural)",
+ "ShortName": "en-IN-NeerjaNeural",
+ "Gender": "Female",
+ "Locale": "en-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Neerja Online (Natural) - English (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-IN, PrabhatNeural)",
+ "ShortName": "en-IN-PrabhatNeural",
+ "Gender": "Male",
+ "Locale": "en-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Prabhat Online (Natural) - English (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-IE, ConnorNeural)",
+ "ShortName": "en-IE-ConnorNeural",
+ "Gender": "Male",
+ "Locale": "en-IE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Connor Online (Natural) - English (Ireland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-IE, EmilyNeural)",
+ "ShortName": "en-IE-EmilyNeural",
+ "Gender": "Female",
+ "Locale": "en-IE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Emily Online (Natural) - English (Ireland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-KE, AsiliaNeural)",
+ "ShortName": "en-KE-AsiliaNeural",
+ "Gender": "Female",
+ "Locale": "en-KE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Asilia Online (Natural) - English (Kenya)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-KE, ChilembaNeural)",
+ "ShortName": "en-KE-ChilembaNeural",
+ "Gender": "Male",
+ "Locale": "en-KE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Chilemba Online (Natural) - English (Kenya)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-NZ, MitchellNeural)",
+ "ShortName": "en-NZ-MitchellNeural",
+ "Gender": "Male",
+ "Locale": "en-NZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mitchell Online (Natural) - English (New Zealand)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-NZ, MollyNeural)",
+ "ShortName": "en-NZ-MollyNeural",
+ "Gender": "Female",
+ "Locale": "en-NZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Molly Online (Natural) - English (New Zealand)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-NG, AbeoNeural)",
+ "ShortName": "en-NG-AbeoNeural",
+ "Gender": "Male",
+ "Locale": "en-NG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Abeo Online (Natural) - English (Nigeria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-NG, EzinneNeural)",
+ "ShortName": "en-NG-EzinneNeural",
+ "Gender": "Female",
+ "Locale": "en-NG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ezinne Online (Natural) - English (Nigeria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-PH, JamesNeural)",
+ "ShortName": "en-PH-JamesNeural",
+ "Gender": "Male",
+ "Locale": "en-PH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft James Online (Natural) - English (Philippines)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-PH, RosaNeural)",
+ "ShortName": "en-PH-RosaNeural",
+ "Gender": "Female",
+ "Locale": "en-PH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rosa Online (Natural) - English (Philippines)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-SG, LunaNeural)",
+ "ShortName": "en-SG-LunaNeural",
+ "Gender": "Female",
+ "Locale": "en-SG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Luna Online (Natural) - English (Singapore)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-SG, WayneNeural)",
+ "ShortName": "en-SG-WayneNeural",
+ "Gender": "Male",
+ "Locale": "en-SG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Wayne Online (Natural) - English (Singapore)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-ZA, LeahNeural)",
+ "ShortName": "en-ZA-LeahNeural",
+ "Gender": "Female",
+ "Locale": "en-ZA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Leah Online (Natural) - English (South Africa)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-ZA, LukeNeural)",
+ "ShortName": "en-ZA-LukeNeural",
+ "Gender": "Male",
+ "Locale": "en-ZA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Luke Online (Natural) - English (South Africa)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-TZ, ElimuNeural)",
+ "ShortName": "en-TZ-ElimuNeural",
+ "Gender": "Male",
+ "Locale": "en-TZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Elimu Online (Natural) - English (Tanzania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-TZ, ImaniNeural)",
+ "ShortName": "en-TZ-ImaniNeural",
+ "Gender": "Female",
+ "Locale": "en-TZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Imani Online (Natural) - English (Tanzania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, LibbyNeural)",
+ "ShortName": "en-GB-LibbyNeural",
+ "Gender": "Female",
+ "Locale": "en-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Libby Online (Natural) - English (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, MaisieNeural)",
+ "ShortName": "en-GB-MaisieNeural",
+ "Gender": "Female",
+ "Locale": "en-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Maisie Online (Natural) - English (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, RyanNeural)",
+ "ShortName": "en-GB-RyanNeural",
+ "Gender": "Male",
+ "Locale": "en-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ryan Online (Natural) - English (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, SoniaNeural)",
+ "ShortName": "en-GB-SoniaNeural",
+ "Gender": "Female",
+ "Locale": "en-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sonia Online (Natural) - English (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-GB, ThomasNeural)",
+ "ShortName": "en-GB-ThomasNeural",
+ "Gender": "Male",
+ "Locale": "en-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Thomas Online (Natural) - English (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AvaMultilingualNeural)",
+ "ShortName": "en-US-AvaMultilingualNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft AvaMultilingual Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Expressive",
+ "Caring",
+ "Pleasant",
+ "Friendly"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AndrewMultilingualNeural)",
+ "ShortName": "en-US-AndrewMultilingualNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft AndrewMultilingual Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Warm",
+ "Confident",
+ "Authentic",
+ "Honest"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)",
+ "ShortName": "en-US-EmmaMultilingualNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft EmmaMultilingual Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Cheerful",
+ "Clear",
+ "Conversational"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, BrianMultilingualNeural)",
+ "ShortName": "en-US-BrianMultilingualNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft BrianMultilingual Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Approachable",
+ "Casual",
+ "Sincere"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AvaNeural)",
+ "ShortName": "en-US-AvaNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ava Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Expressive",
+ "Caring",
+ "Pleasant",
+ "Friendly"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AndrewNeural)",
+ "ShortName": "en-US-AndrewNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Andrew Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Warm",
+ "Confident",
+ "Authentic",
+ "Honest"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, EmmaNeural)",
+ "ShortName": "en-US-EmmaNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Emma Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Cheerful",
+ "Clear",
+ "Conversational"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, BrianNeural)",
+ "ShortName": "en-US-BrianNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Brian Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Conversation",
+ "Copilot"
+ ],
+ "VoicePersonalities": [
+ "Approachable",
+ "Casual",
+ "Sincere"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AnaNeural)",
+ "ShortName": "en-US-AnaNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ana Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "Cartoon",
+ "Conversation"
+ ],
+ "VoicePersonalities": [
+ "Cute"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
+ "ShortName": "en-US-AriaNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Aria Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Positive",
+ "Confident"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, ChristopherNeural)",
+ "ShortName": "en-US-ChristopherNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Christopher Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Reliable",
+ "Authority"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, EricNeural)",
+ "ShortName": "en-US-EricNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Eric Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Rational"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)",
+ "ShortName": "en-US-GuyNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Guy Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Passion"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, JennyNeural)",
+ "ShortName": "en-US-JennyNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jenny Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Considerate",
+ "Comfort"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, MichelleNeural)",
+ "ShortName": "en-US-MichelleNeural",
+ "Gender": "Female",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Michelle Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Pleasant"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, RogerNeural)",
+ "ShortName": "en-US-RogerNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Roger Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Lively"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (en-US, SteffanNeural)",
+ "ShortName": "en-US-SteffanNeural",
+ "Gender": "Male",
+ "Locale": "en-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Steffan Online (Natural) - English (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "News",
+ "Novel"
+ ],
+ "VoicePersonalities": [
+ "Rational"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (et-EE, AnuNeural)",
+ "ShortName": "et-EE-AnuNeural",
+ "Gender": "Female",
+ "Locale": "et-EE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Anu Online (Natural) - Estonian (Estonia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (et-EE, KertNeural)",
+ "ShortName": "et-EE-KertNeural",
+ "Gender": "Male",
+ "Locale": "et-EE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Kert Online (Natural) - Estonian (Estonia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fil-PH, AngeloNeural)",
+ "ShortName": "fil-PH-AngeloNeural",
+ "Gender": "Male",
+ "Locale": "fil-PH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Angelo Online (Natural) - Filipino (Philippines)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fil-PH, BlessicaNeural)",
+ "ShortName": "fil-PH-BlessicaNeural",
+ "Gender": "Female",
+ "Locale": "fil-PH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Blessica Online (Natural) - Filipino (Philippines)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fi-FI, HarriNeural)",
+ "ShortName": "fi-FI-HarriNeural",
+ "Gender": "Male",
+ "Locale": "fi-FI",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Harri Online (Natural) - Finnish (Finland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fi-FI, NooraNeural)",
+ "ShortName": "fi-FI-NooraNeural",
+ "Gender": "Female",
+ "Locale": "fi-FI",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Noora Online (Natural) - Finnish (Finland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-BE, CharlineNeural)",
+ "ShortName": "fr-BE-CharlineNeural",
+ "Gender": "Female",
+ "Locale": "fr-BE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Charline Online (Natural) - French (Belgium)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-BE, GerardNeural)",
+ "ShortName": "fr-BE-GerardNeural",
+ "Gender": "Male",
+ "Locale": "fr-BE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gerard Online (Natural) - French (Belgium)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, ThierryNeural)",
+ "ShortName": "fr-CA-ThierryNeural",
+ "Gender": "Male",
+ "Locale": "fr-CA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Thierry Online (Natural) - French (Canada)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, AntoineNeural)",
+ "ShortName": "fr-CA-AntoineNeural",
+ "Gender": "Male",
+ "Locale": "fr-CA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Antoine Online (Natural) - French (Canada)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, JeanNeural)",
+ "ShortName": "fr-CA-JeanNeural",
+ "Gender": "Male",
+ "Locale": "fr-CA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jean Online (Natural) - French (Canada)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-CA, SylvieNeural)",
+ "ShortName": "fr-CA-SylvieNeural",
+ "Gender": "Female",
+ "Locale": "fr-CA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sylvie Online (Natural) - French (Canada)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, VivienneMultilingualNeural)",
+ "ShortName": "fr-FR-VivienneMultilingualNeural",
+ "Gender": "Female",
+ "Locale": "fr-FR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft VivienneMultilingual Online (Natural) - French (France)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, RemyMultilingualNeural)",
+ "ShortName": "fr-FR-RemyMultilingualNeural",
+ "Gender": "Male",
+ "Locale": "fr-FR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft RemyMultilingual Online (Natural) - French (France)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, DeniseNeural)",
+ "ShortName": "fr-FR-DeniseNeural",
+ "Gender": "Female",
+ "Locale": "fr-FR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Denise Online (Natural) - French (France)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, EloiseNeural)",
+ "ShortName": "fr-FR-EloiseNeural",
+ "Gender": "Female",
+ "Locale": "fr-FR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Eloise Online (Natural) - French (France)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-FR, HenriNeural)",
+ "ShortName": "fr-FR-HenriNeural",
+ "Gender": "Male",
+ "Locale": "fr-FR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Henri Online (Natural) - French (France)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-CH, ArianeNeural)",
+ "ShortName": "fr-CH-ArianeNeural",
+ "Gender": "Female",
+ "Locale": "fr-CH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ariane Online (Natural) - French (Switzerland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fr-CH, FabriceNeural)",
+ "ShortName": "fr-CH-FabriceNeural",
+ "Gender": "Male",
+ "Locale": "fr-CH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Fabrice Online (Natural) - French (Switzerland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (gl-ES, RoiNeural)",
+ "ShortName": "gl-ES-RoiNeural",
+ "Gender": "Male",
+ "Locale": "gl-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Roi Online (Natural) - Galician (Spain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (gl-ES, SabelaNeural)",
+ "ShortName": "gl-ES-SabelaNeural",
+ "Gender": "Female",
+ "Locale": "gl-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sabela Online (Natural) - Galician (Spain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ka-GE, EkaNeural)",
+ "ShortName": "ka-GE-EkaNeural",
+ "Gender": "Female",
+ "Locale": "ka-GE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Eka Online (Natural) - Georgian (Georgia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ka-GE, GiorgiNeural)",
+ "ShortName": "ka-GE-GiorgiNeural",
+ "Gender": "Male",
+ "Locale": "ka-GE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Giorgi Online (Natural) - Georgian (Georgia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-AT, IngridNeural)",
+ "ShortName": "de-AT-IngridNeural",
+ "Gender": "Female",
+ "Locale": "de-AT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ingrid Online (Natural) - German (Austria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-AT, JonasNeural)",
+ "ShortName": "de-AT-JonasNeural",
+ "Gender": "Male",
+ "Locale": "de-AT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jonas Online (Natural) - German (Austria)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, SeraphinaMultilingualNeural)",
+ "ShortName": "de-DE-SeraphinaMultilingualNeural",
+ "Gender": "Female",
+ "Locale": "de-DE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft SeraphinaMultilingual Online (Natural) - German (Germany)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, FlorianMultilingualNeural)",
+ "ShortName": "de-DE-FlorianMultilingualNeural",
+ "Gender": "Male",
+ "Locale": "de-DE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft FlorianMultilingual Online (Natural) - German (Germany)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, AmalaNeural)",
+ "ShortName": "de-DE-AmalaNeural",
+ "Gender": "Female",
+ "Locale": "de-DE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Amala Online (Natural) - German (Germany)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, ConradNeural)",
+ "ShortName": "de-DE-ConradNeural",
+ "Gender": "Male",
+ "Locale": "de-DE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Conrad Online (Natural) - German (Germany)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, KatjaNeural)",
+ "ShortName": "de-DE-KatjaNeural",
+ "Gender": "Female",
+ "Locale": "de-DE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Katja Online (Natural) - German (Germany)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-DE, KillianNeural)",
+ "ShortName": "de-DE-KillianNeural",
+ "Gender": "Male",
+ "Locale": "de-DE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Killian Online (Natural) - German (Germany)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-CH, JanNeural)",
+ "ShortName": "de-CH-JanNeural",
+ "Gender": "Male",
+ "Locale": "de-CH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jan Online (Natural) - German (Switzerland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (de-CH, LeniNeural)",
+ "ShortName": "de-CH-LeniNeural",
+ "Gender": "Female",
+ "Locale": "de-CH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Leni Online (Natural) - German (Switzerland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (el-GR, AthinaNeural)",
+ "ShortName": "el-GR-AthinaNeural",
+ "Gender": "Female",
+ "Locale": "el-GR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Athina Online (Natural) - Greek (Greece)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (el-GR, NestorasNeural)",
+ "ShortName": "el-GR-NestorasNeural",
+ "Gender": "Male",
+ "Locale": "el-GR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nestoras Online (Natural) - Greek (Greece)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (gu-IN, DhwaniNeural)",
+ "ShortName": "gu-IN-DhwaniNeural",
+ "Gender": "Female",
+ "Locale": "gu-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Dhwani Online (Natural) - Gujarati (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (gu-IN, NiranjanNeural)",
+ "ShortName": "gu-IN-NiranjanNeural",
+ "Gender": "Male",
+ "Locale": "gu-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Niranjan Online (Natural) - Gujarati (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (he-IL, AvriNeural)",
+ "ShortName": "he-IL-AvriNeural",
+ "Gender": "Male",
+ "Locale": "he-IL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Avri Online (Natural) - Hebrew (Israel)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (he-IL, HilaNeural)",
+ "ShortName": "he-IL-HilaNeural",
+ "Gender": "Female",
+ "Locale": "he-IL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Hila Online (Natural) - Hebrew (Israel)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (hi-IN, MadhurNeural)",
+ "ShortName": "hi-IN-MadhurNeural",
+ "Gender": "Male",
+ "Locale": "hi-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Madhur Online (Natural) - Hindi (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (hi-IN, SwaraNeural)",
+ "ShortName": "hi-IN-SwaraNeural",
+ "Gender": "Female",
+ "Locale": "hi-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Swara Online (Natural) - Hindi (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (hu-HU, NoemiNeural)",
+ "ShortName": "hu-HU-NoemiNeural",
+ "Gender": "Female",
+ "Locale": "hu-HU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Noemi Online (Natural) - Hungarian (Hungary)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (hu-HU, TamasNeural)",
+ "ShortName": "hu-HU-TamasNeural",
+ "Gender": "Male",
+ "Locale": "hu-HU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Tamas Online (Natural) - Hungarian (Hungary)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (is-IS, GudrunNeural)",
+ "ShortName": "is-IS-GudrunNeural",
+ "Gender": "Female",
+ "Locale": "is-IS",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gudrun Online (Natural) - Icelandic (Iceland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (is-IS, GunnarNeural)",
+ "ShortName": "is-IS-GunnarNeural",
+ "Gender": "Male",
+ "Locale": "is-IS",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gunnar Online (Natural) - Icelandic (Iceland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (id-ID, ArdiNeural)",
+ "ShortName": "id-ID-ArdiNeural",
+ "Gender": "Male",
+ "Locale": "id-ID",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ardi Online (Natural) - Indonesian (Indonesia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (id-ID, GadisNeural)",
+ "ShortName": "id-ID-GadisNeural",
+ "Gender": "Female",
+ "Locale": "id-ID",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gadis Online (Natural) - Indonesian (Indonesia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ga-IE, ColmNeural)",
+ "ShortName": "ga-IE-ColmNeural",
+ "Gender": "Male",
+ "Locale": "ga-IE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Colm Online (Natural) - Irish (Ireland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ga-IE, OrlaNeural)",
+ "ShortName": "ga-IE-OrlaNeural",
+ "Gender": "Female",
+ "Locale": "ga-IE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Orla Online (Natural) - Irish (Ireland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, GiuseppeNeural)",
+ "ShortName": "it-IT-GiuseppeNeural",
+ "Gender": "Male",
+ "Locale": "it-IT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Giuseppe Online (Natural) - Italian (Italy)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, DiegoNeural)",
+ "ShortName": "it-IT-DiegoNeural",
+ "Gender": "Male",
+ "Locale": "it-IT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Diego Online (Natural) - Italian (Italy)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, ElsaNeural)",
+ "ShortName": "it-IT-ElsaNeural",
+ "Gender": "Female",
+ "Locale": "it-IT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Elsa Online (Natural) - Italian (Italy)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (it-IT, IsabellaNeural)",
+ "ShortName": "it-IT-IsabellaNeural",
+ "Gender": "Female",
+ "Locale": "it-IT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Isabella Online (Natural) - Italian (Italy)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ja-JP, KeitaNeural)",
+ "ShortName": "ja-JP-KeitaNeural",
+ "Gender": "Male",
+ "Locale": "ja-JP",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Keita Online (Natural) - Japanese (Japan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ja-JP, NanamiNeural)",
+ "ShortName": "ja-JP-NanamiNeural",
+ "Gender": "Female",
+ "Locale": "ja-JP",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nanami Online (Natural) - Japanese (Japan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (jv-ID, DimasNeural)",
+ "ShortName": "jv-ID-DimasNeural",
+ "Gender": "Male",
+ "Locale": "jv-ID",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Dimas Online (Natural) - Javanese (Indonesia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (jv-ID, SitiNeural)",
+ "ShortName": "jv-ID-SitiNeural",
+ "Gender": "Female",
+ "Locale": "jv-ID",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Siti Online (Natural) - Javanese (Indonesia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (kn-IN, GaganNeural)",
+ "ShortName": "kn-IN-GaganNeural",
+ "Gender": "Male",
+ "Locale": "kn-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gagan Online (Natural) - Kannada (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (kn-IN, SapnaNeural)",
+ "ShortName": "kn-IN-SapnaNeural",
+ "Gender": "Female",
+ "Locale": "kn-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sapna Online (Natural) - Kannada (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (kk-KZ, AigulNeural)",
+ "ShortName": "kk-KZ-AigulNeural",
+ "Gender": "Female",
+ "Locale": "kk-KZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Aigul Online (Natural) - Kazakh (Kazakhstan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (kk-KZ, DauletNeural)",
+ "ShortName": "kk-KZ-DauletNeural",
+ "Gender": "Male",
+ "Locale": "kk-KZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Daulet Online (Natural) - Kazakh (Kazakhstan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (km-KH, PisethNeural)",
+ "ShortName": "km-KH-PisethNeural",
+ "Gender": "Male",
+ "Locale": "km-KH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Piseth Online (Natural) - Khmer (Cambodia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (km-KH, SreymomNeural)",
+ "ShortName": "km-KH-SreymomNeural",
+ "Gender": "Female",
+ "Locale": "km-KH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sreymom Online (Natural) - Khmer (Cambodia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ko-KR, HyunsuNeural)",
+ "ShortName": "ko-KR-HyunsuNeural",
+ "Gender": "Male",
+ "Locale": "ko-KR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Hyunsu Online (Natural) - Korean (Korea)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ko-KR, InJoonNeural)",
+ "ShortName": "ko-KR-InJoonNeural",
+ "Gender": "Male",
+ "Locale": "ko-KR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft InJoon Online (Natural) - Korean (Korea)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ko-KR, SunHiNeural)",
+ "ShortName": "ko-KR-SunHiNeural",
+ "Gender": "Female",
+ "Locale": "ko-KR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft SunHi Online (Natural) - Korean (Korea)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (lo-LA, ChanthavongNeural)",
+ "ShortName": "lo-LA-ChanthavongNeural",
+ "Gender": "Male",
+ "Locale": "lo-LA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Chanthavong Online (Natural) - Lao (Laos)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (lo-LA, KeomanyNeural)",
+ "ShortName": "lo-LA-KeomanyNeural",
+ "Gender": "Female",
+ "Locale": "lo-LA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Keomany Online (Natural) - Lao (Laos)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (lv-LV, EveritaNeural)",
+ "ShortName": "lv-LV-EveritaNeural",
+ "Gender": "Female",
+ "Locale": "lv-LV",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Everita Online (Natural) - Latvian (Latvia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (lv-LV, NilsNeural)",
+ "ShortName": "lv-LV-NilsNeural",
+ "Gender": "Male",
+ "Locale": "lv-LV",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nils Online (Natural) - Latvian (Latvia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (lt-LT, LeonasNeural)",
+ "ShortName": "lt-LT-LeonasNeural",
+ "Gender": "Male",
+ "Locale": "lt-LT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Leonas Online (Natural) - Lithuanian (Lithuania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (lt-LT, OnaNeural)",
+ "ShortName": "lt-LT-OnaNeural",
+ "Gender": "Female",
+ "Locale": "lt-LT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ona Online (Natural) - Lithuanian (Lithuania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mk-MK, AleksandarNeural)",
+ "ShortName": "mk-MK-AleksandarNeural",
+ "Gender": "Male",
+ "Locale": "mk-MK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Aleksandar Online (Natural) - Macedonian (Republic of North Macedonia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mk-MK, MarijaNeural)",
+ "ShortName": "mk-MK-MarijaNeural",
+ "Gender": "Female",
+ "Locale": "mk-MK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Marija Online (Natural) - Macedonian (Republic of North Macedonia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ms-MY, OsmanNeural)",
+ "ShortName": "ms-MY-OsmanNeural",
+ "Gender": "Male",
+ "Locale": "ms-MY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Osman Online (Natural) - Malay (Malaysia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ms-MY, YasminNeural)",
+ "ShortName": "ms-MY-YasminNeural",
+ "Gender": "Female",
+ "Locale": "ms-MY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yasmin Online (Natural) - Malay (Malaysia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ml-IN, MidhunNeural)",
+ "ShortName": "ml-IN-MidhunNeural",
+ "Gender": "Male",
+ "Locale": "ml-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Midhun Online (Natural) - Malayalam (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ml-IN, SobhanaNeural)",
+ "ShortName": "ml-IN-SobhanaNeural",
+ "Gender": "Female",
+ "Locale": "ml-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sobhana Online (Natural) - Malayalam (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mt-MT, GraceNeural)",
+ "ShortName": "mt-MT-GraceNeural",
+ "Gender": "Female",
+ "Locale": "mt-MT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Grace Online (Natural) - Maltese (Malta)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mt-MT, JosephNeural)",
+ "ShortName": "mt-MT-JosephNeural",
+ "Gender": "Male",
+ "Locale": "mt-MT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Joseph Online (Natural) - Maltese (Malta)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mr-IN, AarohiNeural)",
+ "ShortName": "mr-IN-AarohiNeural",
+ "Gender": "Female",
+ "Locale": "mr-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Aarohi Online (Natural) - Marathi (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mr-IN, ManoharNeural)",
+ "ShortName": "mr-IN-ManoharNeural",
+ "Gender": "Male",
+ "Locale": "mr-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Manohar Online (Natural) - Marathi (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mn-MN, BataaNeural)",
+ "ShortName": "mn-MN-BataaNeural",
+ "Gender": "Male",
+ "Locale": "mn-MN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Bataa Online (Natural) - Mongolian (Mongolia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (mn-MN, YesuiNeural)",
+ "ShortName": "mn-MN-YesuiNeural",
+ "Gender": "Female",
+ "Locale": "mn-MN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yesui Online (Natural) - Mongolian (Mongolia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ne-NP, HemkalaNeural)",
+ "ShortName": "ne-NP-HemkalaNeural",
+ "Gender": "Female",
+ "Locale": "ne-NP",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Hemkala Online (Natural) - Nepali (Nepal)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ne-NP, SagarNeural)",
+ "ShortName": "ne-NP-SagarNeural",
+ "Gender": "Male",
+ "Locale": "ne-NP",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sagar Online (Natural) - Nepali (Nepal)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nb-NO, FinnNeural)",
+ "ShortName": "nb-NO-FinnNeural",
+ "Gender": "Male",
+ "Locale": "nb-NO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Finn Online (Natural) - Norwegian (Bokmål Norway)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (nb-NO, PernilleNeural)",
+ "ShortName": "nb-NO-PernilleNeural",
+ "Gender": "Female",
+ "Locale": "nb-NO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Pernille Online (Natural) - Norwegian (Bokmål, Norway)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ps-AF, GulNawazNeural)",
+ "ShortName": "ps-AF-GulNawazNeural",
+ "Gender": "Male",
+ "Locale": "ps-AF",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft GulNawaz Online (Natural) - Pashto (Afghanistan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ps-AF, LatifaNeural)",
+ "ShortName": "ps-AF-LatifaNeural",
+ "Gender": "Female",
+ "Locale": "ps-AF",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Latifa Online (Natural) - Pashto (Afghanistan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fa-IR, DilaraNeural)",
+ "ShortName": "fa-IR-DilaraNeural",
+ "Gender": "Female",
+ "Locale": "fa-IR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Dilara Online (Natural) - Persian (Iran)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (fa-IR, FaridNeural)",
+ "ShortName": "fa-IR-FaridNeural",
+ "Gender": "Male",
+ "Locale": "fa-IR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Farid Online (Natural) - Persian (Iran)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pl-PL, MarekNeural)",
+ "ShortName": "pl-PL-MarekNeural",
+ "Gender": "Male",
+ "Locale": "pl-PL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Marek Online (Natural) - Polish (Poland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pl-PL, ZofiaNeural)",
+ "ShortName": "pl-PL-ZofiaNeural",
+ "Gender": "Female",
+ "Locale": "pl-PL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Zofia Online (Natural) - Polish (Poland)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pt-BR, ThalitaNeural)",
+ "ShortName": "pt-BR-ThalitaNeural",
+ "Gender": "Female",
+ "Locale": "pt-BR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Thalita Online (Natural) - Portuguese (Brazil)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pt-BR, AntonioNeural)",
+ "ShortName": "pt-BR-AntonioNeural",
+ "Gender": "Male",
+ "Locale": "pt-BR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Antonio Online (Natural) - Portuguese (Brazil)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pt-BR, FranciscaNeural)",
+ "ShortName": "pt-BR-FranciscaNeural",
+ "Gender": "Female",
+ "Locale": "pt-BR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Francisca Online (Natural) - Portuguese (Brazil)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pt-PT, DuarteNeural)",
+ "ShortName": "pt-PT-DuarteNeural",
+ "Gender": "Male",
+ "Locale": "pt-PT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Duarte Online (Natural) - Portuguese (Portugal)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (pt-PT, RaquelNeural)",
+ "ShortName": "pt-PT-RaquelNeural",
+ "Gender": "Female",
+ "Locale": "pt-PT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Raquel Online (Natural) - Portuguese (Portugal)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ro-RO, AlinaNeural)",
+ "ShortName": "ro-RO-AlinaNeural",
+ "Gender": "Female",
+ "Locale": "ro-RO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Alina Online (Natural) - Romanian (Romania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ro-RO, EmilNeural)",
+ "ShortName": "ro-RO-EmilNeural",
+ "Gender": "Male",
+ "Locale": "ro-RO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Emil Online (Natural) - Romanian (Romania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ru-RU, DmitryNeural)",
+ "ShortName": "ru-RU-DmitryNeural",
+ "Gender": "Male",
+ "Locale": "ru-RU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Dmitry Online (Natural) - Russian (Russia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ru-RU, SvetlanaNeural)",
+ "ShortName": "ru-RU-SvetlanaNeural",
+ "Gender": "Female",
+ "Locale": "ru-RU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Svetlana Online (Natural) - Russian (Russia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sr-RS, NicholasNeural)",
+ "ShortName": "sr-RS-NicholasNeural",
+ "Gender": "Male",
+ "Locale": "sr-RS",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nicholas Online (Natural) - Serbian (Serbia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sr-RS, SophieNeural)",
+ "ShortName": "sr-RS-SophieNeural",
+ "Gender": "Female",
+ "Locale": "sr-RS",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sophie Online (Natural) - Serbian (Serbia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (si-LK, SameeraNeural)",
+ "ShortName": "si-LK-SameeraNeural",
+ "Gender": "Male",
+ "Locale": "si-LK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sameera Online (Natural) - Sinhala (Sri Lanka)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (si-LK, ThiliniNeural)",
+ "ShortName": "si-LK-ThiliniNeural",
+ "Gender": "Female",
+ "Locale": "si-LK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Thilini Online (Natural) - Sinhala (Sri Lanka)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sk-SK, LukasNeural)",
+ "ShortName": "sk-SK-LukasNeural",
+ "Gender": "Male",
+ "Locale": "sk-SK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Lukas Online (Natural) - Slovak (Slovakia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sk-SK, ViktoriaNeural)",
+ "ShortName": "sk-SK-ViktoriaNeural",
+ "Gender": "Female",
+ "Locale": "sk-SK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Viktoria Online (Natural) - Slovak (Slovakia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sl-SI, PetraNeural)",
+ "ShortName": "sl-SI-PetraNeural",
+ "Gender": "Female",
+ "Locale": "sl-SI",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Petra Online (Natural) - Slovenian (Slovenia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sl-SI, RokNeural)",
+ "ShortName": "sl-SI-RokNeural",
+ "Gender": "Male",
+ "Locale": "sl-SI",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rok Online (Natural) - Slovenian (Slovenia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (so-SO, MuuseNeural)",
+ "ShortName": "so-SO-MuuseNeural",
+ "Gender": "Male",
+ "Locale": "so-SO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Muuse Online (Natural) - Somali (Somalia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (so-SO, UbaxNeural)",
+ "ShortName": "so-SO-UbaxNeural",
+ "Gender": "Female",
+ "Locale": "so-SO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ubax Online (Natural) - Somali (Somalia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-AR, ElenaNeural)",
+ "ShortName": "es-AR-ElenaNeural",
+ "Gender": "Female",
+ "Locale": "es-AR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Elena Online (Natural) - Spanish (Argentina)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-AR, TomasNeural)",
+ "ShortName": "es-AR-TomasNeural",
+ "Gender": "Male",
+ "Locale": "es-AR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Tomas Online (Natural) - Spanish (Argentina)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-BO, MarceloNeural)",
+ "ShortName": "es-BO-MarceloNeural",
+ "Gender": "Male",
+ "Locale": "es-BO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Marcelo Online (Natural) - Spanish (Bolivia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-BO, SofiaNeural)",
+ "ShortName": "es-BO-SofiaNeural",
+ "Gender": "Female",
+ "Locale": "es-BO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sofia Online (Natural) - Spanish (Bolivia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CL, CatalinaNeural)",
+ "ShortName": "es-CL-CatalinaNeural",
+ "Gender": "Female",
+ "Locale": "es-CL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Catalina Online (Natural) - Spanish (Chile)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CL, LorenzoNeural)",
+ "ShortName": "es-CL-LorenzoNeural",
+ "Gender": "Male",
+ "Locale": "es-CL",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Lorenzo Online (Natural) - Spanish (Chile)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-ES, XimenaNeural)",
+ "ShortName": "es-ES-XimenaNeural",
+ "Gender": "Female",
+ "Locale": "es-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ximena Online (Natural) - Spanish (Colombia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CO, GonzaloNeural)",
+ "ShortName": "es-CO-GonzaloNeural",
+ "Gender": "Male",
+ "Locale": "es-CO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gonzalo Online (Natural) - Spanish (Colombia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CO, SalomeNeural)",
+ "ShortName": "es-CO-SalomeNeural",
+ "Gender": "Female",
+ "Locale": "es-CO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Salome Online (Natural) - Spanish (Colombia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CR, JuanNeural)",
+ "ShortName": "es-CR-JuanNeural",
+ "Gender": "Male",
+ "Locale": "es-CR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Juan Online (Natural) - Spanish (Costa Rica)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CR, MariaNeural)",
+ "ShortName": "es-CR-MariaNeural",
+ "Gender": "Female",
+ "Locale": "es-CR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Maria Online (Natural) - Spanish (Costa Rica)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CU, BelkysNeural)",
+ "ShortName": "es-CU-BelkysNeural",
+ "Gender": "Female",
+ "Locale": "es-CU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Belkys Online (Natural) - Spanish (Cuba)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-CU, ManuelNeural)",
+ "ShortName": "es-CU-ManuelNeural",
+ "Gender": "Male",
+ "Locale": "es-CU",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Manuel Online (Natural) - Spanish (Cuba)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-DO, EmilioNeural)",
+ "ShortName": "es-DO-EmilioNeural",
+ "Gender": "Male",
+ "Locale": "es-DO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Emilio Online (Natural) - Spanish (Dominican Republic)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-DO, RamonaNeural)",
+ "ShortName": "es-DO-RamonaNeural",
+ "Gender": "Female",
+ "Locale": "es-DO",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ramona Online (Natural) - Spanish (Dominican Republic)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-EC, AndreaNeural)",
+ "ShortName": "es-EC-AndreaNeural",
+ "Gender": "Female",
+ "Locale": "es-EC",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Andrea Online (Natural) - Spanish (Ecuador)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-EC, LuisNeural)",
+ "ShortName": "es-EC-LuisNeural",
+ "Gender": "Male",
+ "Locale": "es-EC",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Luis Online (Natural) - Spanish (Ecuador)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-SV, LorenaNeural)",
+ "ShortName": "es-SV-LorenaNeural",
+ "Gender": "Female",
+ "Locale": "es-SV",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Lorena Online (Natural) - Spanish (El Salvador)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-SV, RodrigoNeural)",
+ "ShortName": "es-SV-RodrigoNeural",
+ "Gender": "Male",
+ "Locale": "es-SV",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rodrigo Online (Natural) - Spanish (El Salvador)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-GQ, JavierNeural)",
+ "ShortName": "es-GQ-JavierNeural",
+ "Gender": "Male",
+ "Locale": "es-GQ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Javier Online (Natural) - Spanish (Equatorial Guinea)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-GQ, TeresaNeural)",
+ "ShortName": "es-GQ-TeresaNeural",
+ "Gender": "Female",
+ "Locale": "es-GQ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Teresa Online (Natural) - Spanish (Equatorial Guinea)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-GT, AndresNeural)",
+ "ShortName": "es-GT-AndresNeural",
+ "Gender": "Male",
+ "Locale": "es-GT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Andres Online (Natural) - Spanish (Guatemala)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-GT, MartaNeural)",
+ "ShortName": "es-GT-MartaNeural",
+ "Gender": "Female",
+ "Locale": "es-GT",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Marta Online (Natural) - Spanish (Guatemala)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-HN, CarlosNeural)",
+ "ShortName": "es-HN-CarlosNeural",
+ "Gender": "Male",
+ "Locale": "es-HN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Carlos Online (Natural) - Spanish (Honduras)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-HN, KarlaNeural)",
+ "ShortName": "es-HN-KarlaNeural",
+ "Gender": "Female",
+ "Locale": "es-HN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Karla Online (Natural) - Spanish (Honduras)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-MX, DaliaNeural)",
+ "ShortName": "es-MX-DaliaNeural",
+ "Gender": "Female",
+ "Locale": "es-MX",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Dalia Online (Natural) - Spanish (Mexico)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-MX, JorgeNeural)",
+ "ShortName": "es-MX-JorgeNeural",
+ "Gender": "Male",
+ "Locale": "es-MX",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jorge Online (Natural) - Spanish (Mexico)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-NI, FedericoNeural)",
+ "ShortName": "es-NI-FedericoNeural",
+ "Gender": "Male",
+ "Locale": "es-NI",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Federico Online (Natural) - Spanish (Nicaragua)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-NI, YolandaNeural)",
+ "ShortName": "es-NI-YolandaNeural",
+ "Gender": "Female",
+ "Locale": "es-NI",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Yolanda Online (Natural) - Spanish (Nicaragua)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PA, MargaritaNeural)",
+ "ShortName": "es-PA-MargaritaNeural",
+ "Gender": "Female",
+ "Locale": "es-PA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Margarita Online (Natural) - Spanish (Panama)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PA, RobertoNeural)",
+ "ShortName": "es-PA-RobertoNeural",
+ "Gender": "Male",
+ "Locale": "es-PA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Roberto Online (Natural) - Spanish (Panama)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PY, MarioNeural)",
+ "ShortName": "es-PY-MarioNeural",
+ "Gender": "Male",
+ "Locale": "es-PY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mario Online (Natural) - Spanish (Paraguay)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PY, TaniaNeural)",
+ "ShortName": "es-PY-TaniaNeural",
+ "Gender": "Female",
+ "Locale": "es-PY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Tania Online (Natural) - Spanish (Paraguay)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PE, AlexNeural)",
+ "ShortName": "es-PE-AlexNeural",
+ "Gender": "Male",
+ "Locale": "es-PE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Alex Online (Natural) - Spanish (Peru)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PE, CamilaNeural)",
+ "ShortName": "es-PE-CamilaNeural",
+ "Gender": "Female",
+ "Locale": "es-PE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Camila Online (Natural) - Spanish (Peru)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PR, KarinaNeural)",
+ "ShortName": "es-PR-KarinaNeural",
+ "Gender": "Female",
+ "Locale": "es-PR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Karina Online (Natural) - Spanish (Puerto Rico)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-PR, VictorNeural)",
+ "ShortName": "es-PR-VictorNeural",
+ "Gender": "Male",
+ "Locale": "es-PR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Victor Online (Natural) - Spanish (Puerto Rico)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-ES, AlvaroNeural)",
+ "ShortName": "es-ES-AlvaroNeural",
+ "Gender": "Male",
+ "Locale": "es-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Alvaro Online (Natural) - Spanish (Spain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-ES, ElviraNeural)",
+ "ShortName": "es-ES-ElviraNeural",
+ "Gender": "Female",
+ "Locale": "es-ES",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Elvira Online (Natural) - Spanish (Spain)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-US, AlonsoNeural)",
+ "ShortName": "es-US-AlonsoNeural",
+ "Gender": "Male",
+ "Locale": "es-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Alonso Online (Natural) - Spanish (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-US, PalomaNeural)",
+ "ShortName": "es-US-PalomaNeural",
+ "Gender": "Female",
+ "Locale": "es-US",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Paloma Online (Natural) - Spanish (United States)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-UY, MateoNeural)",
+ "ShortName": "es-UY-MateoNeural",
+ "Gender": "Male",
+ "Locale": "es-UY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mateo Online (Natural) - Spanish (Uruguay)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-UY, ValentinaNeural)",
+ "ShortName": "es-UY-ValentinaNeural",
+ "Gender": "Female",
+ "Locale": "es-UY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Valentina Online (Natural) - Spanish (Uruguay)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-VE, PaolaNeural)",
+ "ShortName": "es-VE-PaolaNeural",
+ "Gender": "Female",
+ "Locale": "es-VE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Paola Online (Natural) - Spanish (Venezuela)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (es-VE, SebastianNeural)",
+ "ShortName": "es-VE-SebastianNeural",
+ "Gender": "Male",
+ "Locale": "es-VE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sebastian Online (Natural) - Spanish (Venezuela)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (su-ID, JajangNeural)",
+ "ShortName": "su-ID-JajangNeural",
+ "Gender": "Male",
+ "Locale": "su-ID",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Jajang Online (Natural) - Sundanese (Indonesia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (su-ID, TutiNeural)",
+ "ShortName": "su-ID-TutiNeural",
+ "Gender": "Female",
+ "Locale": "su-ID",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Tuti Online (Natural) - Sundanese (Indonesia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sw-KE, RafikiNeural)",
+ "ShortName": "sw-KE-RafikiNeural",
+ "Gender": "Male",
+ "Locale": "sw-KE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rafiki Online (Natural) - Swahili (Kenya)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sw-KE, ZuriNeural)",
+ "ShortName": "sw-KE-ZuriNeural",
+ "Gender": "Female",
+ "Locale": "sw-KE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Zuri Online (Natural) - Swahili (Kenya)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sw-TZ, DaudiNeural)",
+ "ShortName": "sw-TZ-DaudiNeural",
+ "Gender": "Male",
+ "Locale": "sw-TZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Daudi Online (Natural) - Swahili (Tanzania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sw-TZ, RehemaNeural)",
+ "ShortName": "sw-TZ-RehemaNeural",
+ "Gender": "Female",
+ "Locale": "sw-TZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Rehema Online (Natural) - Swahili (Tanzania)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sv-SE, MattiasNeural)",
+ "ShortName": "sv-SE-MattiasNeural",
+ "Gender": "Male",
+ "Locale": "sv-SE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mattias Online (Natural) - Swedish (Sweden)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (sv-SE, SofieNeural)",
+ "ShortName": "sv-SE-SofieNeural",
+ "Gender": "Female",
+ "Locale": "sv-SE",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sofie Online (Natural) - Swedish (Sweden)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-IN, PallaviNeural)",
+ "ShortName": "ta-IN-PallaviNeural",
+ "Gender": "Female",
+ "Locale": "ta-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Pallavi Online (Natural) - Tamil (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-IN, ValluvarNeural)",
+ "ShortName": "ta-IN-ValluvarNeural",
+ "Gender": "Male",
+ "Locale": "ta-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Valluvar Online (Natural) - Tamil (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-MY, KaniNeural)",
+ "ShortName": "ta-MY-KaniNeural",
+ "Gender": "Female",
+ "Locale": "ta-MY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Kani Online (Natural) - Tamil (Malaysia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-MY, SuryaNeural)",
+ "ShortName": "ta-MY-SuryaNeural",
+ "Gender": "Male",
+ "Locale": "ta-MY",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Surya Online (Natural) - Tamil (Malaysia)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-SG, AnbuNeural)",
+ "ShortName": "ta-SG-AnbuNeural",
+ "Gender": "Male",
+ "Locale": "ta-SG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Anbu Online (Natural) - Tamil (Singapore)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-SG, VenbaNeural)",
+ "ShortName": "ta-SG-VenbaNeural",
+ "Gender": "Female",
+ "Locale": "ta-SG",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Venba Online (Natural) - Tamil (Singapore)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-LK, KumarNeural)",
+ "ShortName": "ta-LK-KumarNeural",
+ "Gender": "Male",
+ "Locale": "ta-LK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Kumar Online (Natural) - Tamil (Sri Lanka)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ta-LK, SaranyaNeural)",
+ "ShortName": "ta-LK-SaranyaNeural",
+ "Gender": "Female",
+ "Locale": "ta-LK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Saranya Online (Natural) - Tamil (Sri Lanka)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (te-IN, MohanNeural)",
+ "ShortName": "te-IN-MohanNeural",
+ "Gender": "Male",
+ "Locale": "te-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Mohan Online (Natural) - Telugu (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (te-IN, ShrutiNeural)",
+ "ShortName": "te-IN-ShrutiNeural",
+ "Gender": "Female",
+ "Locale": "te-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Shruti Online (Natural) - Telugu (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (th-TH, NiwatNeural)",
+ "ShortName": "th-TH-NiwatNeural",
+ "Gender": "Male",
+ "Locale": "th-TH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Niwat Online (Natural) - Thai (Thailand)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (th-TH, PremwadeeNeural)",
+ "ShortName": "th-TH-PremwadeeNeural",
+ "Gender": "Female",
+ "Locale": "th-TH",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Premwadee Online (Natural) - Thai (Thailand)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (tr-TR, AhmetNeural)",
+ "ShortName": "tr-TR-AhmetNeural",
+ "Gender": "Male",
+ "Locale": "tr-TR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ahmet Online (Natural) - Turkish (Turkey)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (tr-TR, EmelNeural)",
+ "ShortName": "tr-TR-EmelNeural",
+ "Gender": "Female",
+ "Locale": "tr-TR",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Emel Online (Natural) - Turkish (Turkey)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (uk-UA, OstapNeural)",
+ "ShortName": "uk-UA-OstapNeural",
+ "Gender": "Male",
+ "Locale": "uk-UA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Ostap Online (Natural) - Ukrainian (Ukraine)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (uk-UA, PolinaNeural)",
+ "ShortName": "uk-UA-PolinaNeural",
+ "Gender": "Female",
+ "Locale": "uk-UA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Polina Online (Natural) - Ukrainian (Ukraine)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ur-IN, GulNeural)",
+ "ShortName": "ur-IN-GulNeural",
+ "Gender": "Female",
+ "Locale": "ur-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Gul Online (Natural) - Urdu (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ur-IN, SalmanNeural)",
+ "ShortName": "ur-IN-SalmanNeural",
+ "Gender": "Male",
+ "Locale": "ur-IN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Salman Online (Natural) - Urdu (India)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ur-PK, AsadNeural)",
+ "ShortName": "ur-PK-AsadNeural",
+ "Gender": "Male",
+ "Locale": "ur-PK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Asad Online (Natural) - Urdu (Pakistan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (ur-PK, UzmaNeural)",
+ "ShortName": "ur-PK-UzmaNeural",
+ "Gender": "Female",
+ "Locale": "ur-PK",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Uzma Online (Natural) - Urdu (Pakistan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (uz-UZ, MadinaNeural)",
+ "ShortName": "uz-UZ-MadinaNeural",
+ "Gender": "Female",
+ "Locale": "uz-UZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Madina Online (Natural) - Uzbek (Uzbekistan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (uz-UZ, SardorNeural)",
+ "ShortName": "uz-UZ-SardorNeural",
+ "Gender": "Male",
+ "Locale": "uz-UZ",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Sardor Online (Natural) - Uzbek (Uzbekistan)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (vi-VN, HoaiMyNeural)",
+ "ShortName": "vi-VN-HoaiMyNeural",
+ "Gender": "Female",
+ "Locale": "vi-VN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft HoaiMy Online (Natural) - Vietnamese (Vietnam)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (vi-VN, NamMinhNeural)",
+ "ShortName": "vi-VN-NamMinhNeural",
+ "Gender": "Male",
+ "Locale": "vi-VN",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft NamMinh Online (Natural) - Vietnamese (Vietnam)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (cy-GB, AledNeural)",
+ "ShortName": "cy-GB-AledNeural",
+ "Gender": "Male",
+ "Locale": "cy-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Aled Online (Natural) - Welsh (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (cy-GB, NiaNeural)",
+ "ShortName": "cy-GB-NiaNeural",
+ "Gender": "Female",
+ "Locale": "cy-GB",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Nia Online (Natural) - Welsh (United Kingdom)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zu-ZA, ThandoNeural)",
+ "ShortName": "zu-ZA-ThandoNeural",
+ "Gender": "Female",
+ "Locale": "zu-ZA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Thando Online (Natural) - Zulu (South Africa)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ },
+ {
+ "Name": "Microsoft Server Speech Text to Speech Voice (zu-ZA, ThembaNeural)",
+ "ShortName": "zu-ZA-ThembaNeural",
+ "Gender": "Male",
+ "Locale": "zu-ZA",
+ "SuggestedCodec": "audio-24khz-48kbitrate-mono-mp3",
+ "FriendlyName": "Microsoft Themba Online (Natural) - Zulu (South Africa)",
+ "Status": "GA",
+ "VoiceTag": {
+ "ContentCategories": [
+ "General"
+ ],
+ "VoicePersonalities": [
+ "Friendly",
+ "Positive"
+ ]
+ }
+ }
+]
\ No newline at end of file
diff --git a/rvc/lib/utils.py b/rvc/lib/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..429a17218b59d95f233363d50b9306de4b8391ab
--- /dev/null
+++ b/rvc/lib/utils.py
@@ -0,0 +1,153 @@
+import os, sys
+import librosa
+import soundfile as sf
+import numpy as np
+import re
+import unicodedata
+import wget
+import subprocess
+from pydub import AudioSegment
+import tempfile
+from torch import nn
+
+import logging
+from transformers import HubertModel
+import warnings
+
+# Remove this to see warnings about transformers models
+warnings.filterwarnings("ignore")
+
+logging.getLogger("fairseq").setLevel(logging.ERROR)
+logging.getLogger("faiss.loader").setLevel(logging.ERROR)
+logging.getLogger("transformers").setLevel(logging.ERROR)
+logging.getLogger("torch").setLevel(logging.ERROR)
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+base_path = os.path.join(now_dir, "rvc", "models", "formant", "stftpitchshift")
+stft = base_path + ".exe" if sys.platform == "win32" else base_path
+
+
+class HubertModelWithFinalProj(HubertModel):
+ def __init__(self, config):
+ super().__init__(config)
+ self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
+
+
+def load_audio(file, sample_rate):
+ try:
+ file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+ audio, sr = sf.read(file)
+ if len(audio.shape) > 1:
+ audio = librosa.to_mono(audio.T)
+ if sr != sample_rate:
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate)
+ except Exception as error:
+ raise RuntimeError(f"An error occurred loading the audio: {error}")
+
+ return audio.flatten()
+
+
+def load_audio_infer(
+ file, sample_rate, formant_shifting, formant_qfrency, formant_timbre
+):
+ try:
+ file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+ if not os.path.isfile(file):
+ raise FileNotFoundError(f"File not found: {file}")
+ audio, sr = sf.read(file)
+ if len(audio.shape) > 1:
+ audio = librosa.to_mono(audio.T)
+ if sr != sample_rate:
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate)
+ if formant_shifting:
+ audio = (audio * 32767).astype(np.int16)
+ audio_segment = AudioSegment(
+ audio.tobytes(),
+ frame_rate=sample_rate,
+ sample_width=2,
+ channels=1,
+ )
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
+ temp_file_path = temp_file.name
+ audio_segment.export(temp_file_path, format="wav")
+
+ command = [
+ stft,
+ "-i",
+ temp_file_path,
+ "-q",
+ str(formant_qfrency),
+ "-t",
+ str(formant_timbre),
+ "-o",
+ f"{temp_file_path}_formatted.wav",
+ ]
+ subprocess.run(command, shell=True)
+ formatted_audio_path = f"{temp_file_path}_formatted.wav"
+ audio, sr = sf.read(formatted_audio_path)
+ if len(audio.shape) > 1:
+ audio = librosa.to_mono(audio.T)
+ if sr != sample_rate:
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate)
+ except Exception as error:
+ raise RuntimeError(f"An error occurred loading the audio: {error}")
+ return audio.flatten()
+
+
+def format_title(title):
+ formatted_title = (
+ unicodedata.normalize("NFKD", title).encode("ascii", "ignore").decode("utf-8")
+ )
+ formatted_title = re.sub(r"[\u2500-\u257F]+", "", formatted_title)
+ formatted_title = re.sub(r"[^\w\s.-]", "", formatted_title)
+ formatted_title = re.sub(r"\s+", "_", formatted_title)
+ return formatted_title
+
+
+def load_embedding(embedder_model, custom_embedder=None):
+ embedder_root = os.path.join(now_dir, "rvc", "models", "embedders")
+ embedding_list = {
+ "contentvec": os.path.join(embedder_root, "contentvec"),
+ "chinese-hubert-base": os.path.join(embedder_root, "chinese_hubert_base"),
+ "japanese-hubert-base": os.path.join(embedder_root, "japanese_hubert_base"),
+ "korean-hubert-base": os.path.join(embedder_root, "korean_hubert_base"),
+ }
+
+ online_embedders = {
+ "contentvec": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/contentvec/pytorch_model.bin",
+ "chinese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/chinese_hubert_base/pytorch_model.bin",
+ "japanese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/japanese_hubert_base/pytorch_model.bin",
+ "korean-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/korean_hubert_base/pytorch_model.bin",
+ }
+
+ config_files = {
+ "contentvec": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/contentvec/config.json",
+ "chinese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/chinese_hubert_base/config.json",
+ "japanese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/japanese_hubert_base/config.json",
+ "korean-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/korean_hubert_base/config.json",
+ }
+
+ if embedder_model == "custom":
+ if os.path.exists(custom_embedder):
+ model_path = custom_embedder
+ else:
+ print(f"Custom embedder not found: {custom_embedder}, using contentvec")
+ model_path = embedding_list["contentvec"]
+ else:
+ model_path = embedding_list[embedder_model]
+ bin_file = os.path.join(model_path, "pytorch_model.bin")
+ json_file = os.path.join(model_path, "config.json")
+ os.makedirs(model_path, exist_ok=True)
+ if not os.path.exists(bin_file):
+ url = online_embedders[embedder_model]
+ print(f"Downloading {url} to {model_path}...")
+ wget.download(url, out=bin_file)
+ if not os.path.exists(json_file):
+ url = config_files[embedder_model]
+ print(f"Downloading {url} to {model_path}...")
+ wget.download(url, out=json_file)
+
+ models = HubertModelWithFinalProj.from_pretrained(model_path)
+ return models
diff --git a/rvc/models/embedders/.gitkeep b/rvc/models/embedders/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/rvc/models/embedders/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/rvc/models/embedders/embedders_custom/.gitkeep b/rvc/models/embedders/embedders_custom/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/rvc/models/embedders/embedders_custom/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/rvc/models/formant/.gitkeep b/rvc/models/formant/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/rvc/models/formant/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/rvc/models/predictors/.gitkeep b/rvc/models/predictors/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/rvc/models/pretraineds/.gitkeep b/rvc/models/pretraineds/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/rvc/models/pretraineds/pretrained_v1/.gitkeep b/rvc/models/pretraineds/pretrained_v1/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/rvc/models/pretraineds/pretrained_v2/.gitkeep b/rvc/models/pretraineds/pretrained_v2/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/rvc/models/pretraineds/pretraineds_custom/.gitkeep b/rvc/models/pretraineds/pretraineds_custom/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/rvc/models/pretraineds/pretraineds_custom/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/rvc/train/data_utils.py b/rvc/train/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2013558ff4e662dc68ef815eabcaa2ea37aa72a1
--- /dev/null
+++ b/rvc/train/data_utils.py
@@ -0,0 +1,592 @@
+import os
+import numpy as np
+import torch
+import torch.utils.data
+
+from mel_processing import spectrogram_torch
+from utils import load_filepaths_and_text, load_wav_to_torch
+
+
+class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
+ """
+ Dataset that loads text and audio pairs.
+
+ Args:
+ hparams: Hyperparameters.
+ """
+
+ def __init__(self, hparams):
+ self.audiopaths_and_text = load_filepaths_and_text(hparams.training_files)
+ self.max_wav_value = hparams.max_wav_value
+ self.sample_rate = hparams.sample_rate
+ self.filter_length = hparams.filter_length
+ self.hop_length = hparams.hop_length
+ self.win_length = hparams.win_length
+ self.sample_rate = hparams.sample_rate
+ self.min_text_len = getattr(hparams, "min_text_len", 1)
+ self.max_text_len = getattr(hparams, "max_text_len", 5000)
+ self._filter()
+
+ def _filter(self):
+ """
+ Filters audio paths and text pairs based on text length.
+ """
+ audiopaths_and_text_new = []
+ lengths = []
+ for audiopath, text, pitch, pitchf, dv in self.audiopaths_and_text:
+ if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
+ audiopaths_and_text_new.append([audiopath, text, pitch, pitchf, dv])
+ lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length))
+ self.audiopaths_and_text = audiopaths_and_text_new
+ self.lengths = lengths
+
+ def get_sid(self, sid):
+ """
+ Converts speaker ID to a LongTensor.
+
+ Args:
+ sid (str): Speaker ID.
+ """
+ try:
+ sid = torch.LongTensor([int(sid)])
+ except ValueError as error:
+ print(f"Error converting speaker ID '{sid}' to integer. Exception: {error}")
+ sid = torch.LongTensor([0])
+ return sid
+
+ def get_audio_text_pair(self, audiopath_and_text):
+ """
+ Loads and processes audio and text data for a single pair.
+
+ Args:
+ audiopath_and_text (list): List containing audio path, text, pitch, pitchf, and speaker ID.
+ """
+ file = audiopath_and_text[0]
+ phone = audiopath_and_text[1]
+ pitch = audiopath_and_text[2]
+ pitchf = audiopath_and_text[3]
+ dv = audiopath_and_text[4]
+
+ phone, pitch, pitchf = self.get_labels(phone, pitch, pitchf)
+ spec, wav = self.get_audio(file)
+ dv = self.get_sid(dv)
+
+ len_phone = phone.size()[0]
+ len_spec = spec.size()[-1]
+ if len_phone != len_spec:
+ len_min = min(len_phone, len_spec)
+ len_wav = len_min * self.hop_length
+
+ spec = spec[:, :len_min]
+ wav = wav[:, :len_wav]
+
+ phone = phone[:len_min, :]
+ pitch = pitch[:len_min]
+ pitchf = pitchf[:len_min]
+
+ return (spec, wav, phone, pitch, pitchf, dv)
+
+ def get_labels(self, phone, pitch, pitchf):
+ """
+ Loads and processes phoneme, pitch, and pitchf labels.
+
+ Args:
+ phone (str): Path to phoneme label file.
+ pitch (str): Path to pitch label file.
+ pitchf (str): Path to pitchf label file.
+ """
+ phone = np.load(phone)
+ phone = np.repeat(phone, 2, axis=0)
+ pitch = np.load(pitch)
+ pitchf = np.load(pitchf)
+ n_num = min(phone.shape[0], 900)
+ phone = phone[:n_num, :]
+ pitch = pitch[:n_num]
+ pitchf = pitchf[:n_num]
+ phone = torch.FloatTensor(phone)
+ pitch = torch.LongTensor(pitch)
+ pitchf = torch.FloatTensor(pitchf)
+ return phone, pitch, pitchf
+
+ def get_audio(self, filename):
+ """
+ Loads and processes audio data.
+
+ Args:
+ filename (str): Path to audio file.
+ """
+ audio, sample_rate = load_wav_to_torch(filename)
+ if sample_rate != self.sample_rate:
+ raise ValueError(
+ f"{sample_rate} SR doesn't match target {self.sample_rate} SR"
+ )
+ audio_norm = audio
+ audio_norm = audio_norm.unsqueeze(0)
+ spec_filename = filename.replace(".wav", ".spec.pt")
+ if os.path.exists(spec_filename):
+ try:
+ spec = torch.load(spec_filename)
+ except Exception as error:
+ print(f"An error occurred getting spec from {spec_filename}: {error}")
+ spec = spectrogram_torch(
+ audio_norm,
+ self.filter_length,
+ self.hop_length,
+ self.win_length,
+ center=False,
+ )
+ spec = torch.squeeze(spec, 0)
+ torch.save(spec, spec_filename, _use_new_zipfile_serialization=False)
+ else:
+ spec = spectrogram_torch(
+ audio_norm,
+ self.filter_length,
+ self.hop_length,
+ self.win_length,
+ center=False,
+ )
+ spec = torch.squeeze(spec, 0)
+ torch.save(spec, spec_filename, _use_new_zipfile_serialization=False)
+ return spec, audio_norm
+
+ def __getitem__(self, index):
+ """
+ Returns a single audio-text pair.
+
+ Args:
+ index (int): Index of the data sample.
+ """
+ return self.get_audio_text_pair(self.audiopaths_and_text[index])
+
+ def __len__(self):
+ """
+ Returns the length of the dataset.
+ """
+ return len(self.audiopaths_and_text)
+
+
+class TextAudioCollateMultiNSFsid:
+ """
+ Collates text and audio data for training.
+
+ Args:
+ return_ids (bool, optional): Whether to return sample IDs. Defaults to False.
+ """
+
+ def __init__(self, return_ids=False):
+ self.return_ids = return_ids
+
+ def __call__(self, batch):
+ """
+ Collates a batch of data samples.
+
+ Args:
+ batch (list): List of data samples.
+ """
+ _, ids_sorted_decreasing = torch.sort(
+ torch.LongTensor([x[0].size(1) for x in batch]), dim=0, descending=True
+ )
+
+ max_spec_len = max([x[0].size(1) for x in batch])
+ max_wave_len = max([x[1].size(1) for x in batch])
+ spec_lengths = torch.LongTensor(len(batch))
+ wave_lengths = torch.LongTensor(len(batch))
+ spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), max_spec_len)
+ wave_padded = torch.FloatTensor(len(batch), 1, max_wave_len)
+ spec_padded.zero_()
+ wave_padded.zero_()
+
+ max_phone_len = max([x[2].size(0) for x in batch])
+ phone_lengths = torch.LongTensor(len(batch))
+ phone_padded = torch.FloatTensor(
+ len(batch), max_phone_len, batch[0][2].shape[1]
+ )
+ pitch_padded = torch.LongTensor(len(batch), max_phone_len)
+ pitchf_padded = torch.FloatTensor(len(batch), max_phone_len)
+ phone_padded.zero_()
+ pitch_padded.zero_()
+ pitchf_padded.zero_()
+ sid = torch.LongTensor(len(batch))
+
+ for i in range(len(ids_sorted_decreasing)):
+ row = batch[ids_sorted_decreasing[i]]
+
+ spec = row[0]
+ spec_padded[i, :, : spec.size(1)] = spec
+ spec_lengths[i] = spec.size(1)
+
+ wave = row[1]
+ wave_padded[i, :, : wave.size(1)] = wave
+ wave_lengths[i] = wave.size(1)
+
+ phone = row[2]
+ phone_padded[i, : phone.size(0), :] = phone
+ phone_lengths[i] = phone.size(0)
+
+ pitch = row[3]
+ pitch_padded[i, : pitch.size(0)] = pitch
+ pitchf = row[4]
+ pitchf_padded[i, : pitchf.size(0)] = pitchf
+
+ sid[i] = row[5]
+
+ return (
+ phone_padded,
+ phone_lengths,
+ pitch_padded,
+ pitchf_padded,
+ spec_padded,
+ spec_lengths,
+ wave_padded,
+ wave_lengths,
+ sid,
+ )
+
+
+class TextAudioLoader(torch.utils.data.Dataset):
+ """
+ Dataset that loads text and audio pairs.
+
+ Args:
+ hparams: Hyperparameters.
+ """
+
+ def __init__(self, hparams):
+ self.audiopaths_and_text = load_filepaths_and_text(hparams.training_files)
+ self.max_wav_value = hparams.max_wav_value
+ self.sample_rate = hparams.sample_rate
+ self.filter_length = hparams.filter_length
+ self.hop_length = hparams.hop_length
+ self.win_length = hparams.win_length
+ self.sample_rate = hparams.sample_rate
+ self.min_text_len = getattr(hparams, "min_text_len", 1)
+ self.max_text_len = getattr(hparams, "max_text_len", 5000)
+ self._filter()
+
+ def _filter(self):
+ """
+ Filters audio paths and text pairs based on text length.
+ """
+ audiopaths_and_text_new = []
+ lengths = []
+ for entry in self.audiopaths_and_text:
+ if len(entry) >= 3:
+ audiopath, text, dv = entry[:3]
+ if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
+ audiopaths_and_text_new.append([audiopath, text, dv])
+ lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length))
+
+ self.audiopaths_and_text = audiopaths_and_text_new
+ self.lengths = lengths
+
+ def get_sid(self, sid):
+ """
+ Converts speaker ID to a LongTensor.
+
+ Args:
+ sid (str): Speaker ID.
+ """
+ try:
+ sid = torch.LongTensor([int(sid)])
+ except ValueError as error:
+ print(f"Error converting speaker ID '{sid}' to integer. Exception: {error}")
+ sid = torch.LongTensor([0])
+ return sid
+
+ def get_audio_text_pair(self, audiopath_and_text):
+ """
+ Loads and processes audio and text data for a single pair.
+
+ Args:
+ audiopath_and_text (list): List containing audio path, text, and speaker ID.
+ """
+ file = audiopath_and_text[0]
+ phone = audiopath_and_text[1]
+ dv = audiopath_and_text[2]
+
+ phone = self.get_labels(phone)
+ spec, wav = self.get_audio(file)
+ dv = self.get_sid(dv)
+
+ len_phone = phone.size()[0]
+ len_spec = spec.size()[-1]
+ if len_phone != len_spec:
+ len_min = min(len_phone, len_spec)
+ len_wav = len_min * self.hop_length
+ spec = spec[:, :len_min]
+ wav = wav[:, :len_wav]
+ phone = phone[:len_min, :]
+ return (spec, wav, phone, dv)
+
+ def get_labels(self, phone):
+ """
+ Loads and processes phoneme labels.
+
+ Args:
+ phone (str): Path to phoneme label file.
+ """
+ phone = np.load(phone)
+ phone = np.repeat(phone, 2, axis=0)
+ n_num = min(phone.shape[0], 900)
+ phone = phone[:n_num, :]
+ phone = torch.FloatTensor(phone)
+ return phone
+
+ def get_audio(self, filename):
+ """
+ Loads and processes audio data.
+
+ Args:
+ filename (str): Path to audio file.
+ """
+ audio, sample_rate = load_wav_to_torch(filename)
+ if sample_rate != self.sample_rate:
+ raise ValueError(
+ f"{sample_rate} SR doesn't match target {self.sample_rate} SR"
+ )
+ audio_norm = audio
+ audio_norm = audio_norm.unsqueeze(0)
+ spec_filename = filename.replace(".wav", ".spec.pt")
+ if os.path.exists(spec_filename):
+ try:
+ spec = torch.load(spec_filename)
+ except Exception as error:
+ print(f"An error occurred getting spec from {spec_filename}: {error}")
+ spec = spectrogram_torch(
+ audio_norm,
+ self.filter_length,
+ self.hop_length,
+ self.win_length,
+ center=False,
+ )
+ spec = torch.squeeze(spec, 0)
+ torch.save(spec, spec_filename, _use_new_zipfile_serialization=False)
+ else:
+ spec = spectrogram_torch(
+ audio_norm,
+ self.filter_length,
+ self.hop_length,
+ self.win_length,
+ center=False,
+ )
+ spec = torch.squeeze(spec, 0)
+ torch.save(spec, spec_filename, _use_new_zipfile_serialization=False)
+ return spec, audio_norm
+
+ def __getitem__(self, index):
+ """
+ Returns a single audio-text pair.
+
+ Args:
+ index (int): Index of the data sample.
+ """
+ return self.get_audio_text_pair(self.audiopaths_and_text[index])
+
+ def __len__(self):
+ """
+ Returns the length of the dataset.
+ """
+ return len(self.audiopaths_and_text)
+
+
+class TextAudioCollate:
+ """
+ Collates text and audio data for training.
+
+ Args:
+ return_ids (bool, optional): Whether to return sample IDs. Defaults to False.
+ """
+
+ def __init__(self, return_ids=False):
+ self.return_ids = return_ids
+
+ def __call__(self, batch):
+ """
+ Collates a batch of data samples.
+
+ Args:
+ batch (list): List of data samples.
+ """
+ _, ids_sorted_decreasing = torch.sort(
+ torch.LongTensor([x[0].size(1) for x in batch]), dim=0, descending=True
+ )
+
+ max_spec_len = max([x[0].size(1) for x in batch])
+ max_wave_len = max([x[1].size(1) for x in batch])
+ spec_lengths = torch.LongTensor(len(batch))
+ wave_lengths = torch.LongTensor(len(batch))
+ spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), max_spec_len)
+ wave_padded = torch.FloatTensor(len(batch), 1, max_wave_len)
+ spec_padded.zero_()
+ wave_padded.zero_()
+
+ max_phone_len = max([x[2].size(0) for x in batch])
+ phone_lengths = torch.LongTensor(len(batch))
+ phone_padded = torch.FloatTensor(
+ len(batch), max_phone_len, batch[0][2].shape[1]
+ )
+ phone_padded.zero_()
+ sid = torch.LongTensor(len(batch))
+
+ for i in range(len(ids_sorted_decreasing)):
+ row = batch[ids_sorted_decreasing[i]]
+
+ spec = row[0]
+ spec_padded[i, :, : spec.size(1)] = spec
+ spec_lengths[i] = spec.size(1)
+
+ wave = row[1]
+ wave_padded[i, :, : wave.size(1)] = wave
+ wave_lengths[i] = wave.size(1)
+
+ phone = row[2]
+ phone_padded[i, : phone.size(0), :] = phone
+ phone_lengths[i] = phone.size(0)
+
+ sid[i] = row[3]
+
+ return (
+ phone_padded,
+ phone_lengths,
+ spec_padded,
+ spec_lengths,
+ wave_padded,
+ wave_lengths,
+ sid,
+ )
+
+
+class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler):
+ """
+ Distributed sampler that groups data into buckets based on length.
+
+ Args:
+ dataset (torch.utils.data.Dataset): Dataset to sample from.
+ batch_size (int): Batch size.
+ boundaries (list): List of length boundaries for buckets.
+ num_replicas (int, optional): Number of processes participating in distributed training. Defaults to None.
+ rank (int, optional): Rank of the current process. Defaults to None.
+ shuffle (bool, optional): Whether to shuffle the data. Defaults to True.
+ """
+
+ def __init__(
+ self,
+ dataset,
+ batch_size,
+ boundaries,
+ num_replicas=None,
+ rank=None,
+ shuffle=True,
+ ):
+ super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
+ self.lengths = dataset.lengths
+ self.batch_size = batch_size
+ self.boundaries = boundaries
+
+ self.buckets, self.num_samples_per_bucket = self._create_buckets()
+ self.total_size = sum(self.num_samples_per_bucket)
+ self.num_samples = self.total_size // self.num_replicas
+
+ def _create_buckets(self):
+ """
+ Creates buckets of data samples based on length.
+ """
+ buckets = [[] for _ in range(len(self.boundaries) - 1)]
+ for i in range(len(self.lengths)):
+ length = self.lengths[i]
+ idx_bucket = self._bisect(length)
+ if idx_bucket != -1:
+ buckets[idx_bucket].append(i)
+
+ for i in range(len(buckets) - 1, -1, -1): #
+ if len(buckets[i]) == 0:
+ buckets.pop(i)
+ self.boundaries.pop(i + 1)
+
+ num_samples_per_bucket = []
+ for i in range(len(buckets)):
+ len_bucket = len(buckets[i])
+ total_batch_size = self.num_replicas * self.batch_size
+ rem = (
+ total_batch_size - (len_bucket % total_batch_size)
+ ) % total_batch_size
+ num_samples_per_bucket.append(len_bucket + rem)
+ return buckets, num_samples_per_bucket
+
+ def __iter__(self):
+ """
+ Iterates over batches of data samples.
+ """
+ g = torch.Generator()
+ g.manual_seed(self.epoch)
+
+ indices = []
+ if self.shuffle:
+ for bucket in self.buckets:
+ indices.append(torch.randperm(len(bucket), generator=g).tolist())
+ else:
+ for bucket in self.buckets:
+ indices.append(list(range(len(bucket))))
+
+ batches = []
+ for i in range(len(self.buckets)):
+ bucket = self.buckets[i]
+ len_bucket = len(bucket)
+ ids_bucket = indices[i]
+ num_samples_bucket = self.num_samples_per_bucket[i]
+
+ rem = num_samples_bucket - len_bucket
+ ids_bucket = (
+ ids_bucket
+ + ids_bucket * (rem // len_bucket)
+ + ids_bucket[: (rem % len_bucket)]
+ )
+
+ ids_bucket = ids_bucket[self.rank :: self.num_replicas]
+
+ # batching
+ for j in range(len(ids_bucket) // self.batch_size):
+ batch = [
+ bucket[idx]
+ for idx in ids_bucket[
+ j * self.batch_size : (j + 1) * self.batch_size
+ ]
+ ]
+ batches.append(batch)
+
+ if self.shuffle:
+ batch_ids = torch.randperm(len(batches), generator=g).tolist()
+ batches = [batches[i] for i in batch_ids]
+ self.batches = batches
+
+ assert len(self.batches) * self.batch_size == self.num_samples
+ return iter(self.batches)
+
+ def _bisect(self, x, lo=0, hi=None):
+ """
+ Performs binary search to find the bucket index for a given length.
+
+ Args:
+ x (int): Length to find the bucket for.
+ lo (int, optional): Lower bound of the search range. Defaults to 0.
+ hi (int, optional): Upper bound of the search range. Defaults to None.
+ """
+ if hi is None:
+ hi = len(self.boundaries) - 1
+
+ if hi > lo:
+ mid = (hi + lo) // 2
+ if self.boundaries[mid] < x and x <= self.boundaries[mid + 1]:
+ return mid
+ elif x <= self.boundaries[mid]:
+ return self._bisect(x, lo, mid)
+ else:
+ return self._bisect(x, mid + 1, hi)
+ else:
+ return -1
+
+ def __len__(self):
+ """
+ Returns the length of the sampler.
+ """
+ return self.num_samples // self.batch_size
diff --git a/rvc/train/extract/extract.py b/rvc/train/extract/extract.py
new file mode 100644
index 0000000000000000000000000000000000000000..606b461532d0b0d2a096884ce83c2986e598d73e
--- /dev/null
+++ b/rvc/train/extract/extract.py
@@ -0,0 +1,286 @@
+import os
+import sys
+import glob
+import time
+import tqdm
+import torch
+import torchcrepe
+import numpy as np
+import concurrent.futures
+import multiprocessing as mp
+
+# Zluda
+if torch.cuda.is_available() and torch.cuda.get_device_name().endswith("[ZLUDA]"):
+ torch.backends.cudnn.enabled = False
+ torch.backends.cuda.enable_flash_sdp(False)
+ torch.backends.cuda.enable_math_sdp(True)
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
+
+now_dir = os.getcwd()
+sys.path.append(os.path.join(now_dir))
+
+from rvc.lib.utils import load_audio, load_embedding
+from rvc.train.extract.preparing_files import generate_config, generate_filelist
+from rvc.lib.predictors.RMVPE import RMVPE0Predictor
+from rvc.configs.config import Config
+
+# Load config
+config = Config()
+
+mp.set_start_method("spawn", force=True)
+
+
+class FeatureInput:
+ """Class for F0 extraction."""
+
+ def __init__(self, sample_rate=16000, hop_size=160, device="cpu"):
+ self.fs = sample_rate
+ self.hop = hop_size
+ self.f0_bin = 256
+ self.f0_max = 1100.0
+ self.f0_min = 50.0
+ self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
+ self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
+ self.device = device
+ self.model_rmvpe = None
+
+ def compute_f0(self, np_arr, f0_method, hop_length):
+ """Extract F0 using the specified method."""
+ if f0_method == "crepe":
+ return self.get_crepe(np_arr, hop_length)
+ elif f0_method == "rmvpe":
+ return self.model_rmvpe.infer_from_audio(np_arr, thred=0.03)
+ else:
+ raise ValueError(f"Unknown F0 method: {f0_method}")
+
+ def get_crepe(self, x, hop_length):
+ """Extract F0 using CREPE."""
+ audio = torch.from_numpy(x.astype(np.float32)).to(self.device)
+ audio /= torch.quantile(torch.abs(audio), 0.999)
+ audio = audio.unsqueeze(0)
+ pitch = torchcrepe.predict(
+ audio,
+ self.fs,
+ hop_length,
+ self.f0_min,
+ self.f0_max,
+ "full",
+ batch_size=hop_length * 2,
+ device=audio.device,
+ pad=True,
+ )
+ source = pitch.squeeze(0).cpu().float().numpy()
+ source[source < 0.001] = np.nan
+ target = np.interp(
+ np.arange(0, len(source) * (x.size // self.hop), len(source))
+ / (x.size // self.hop),
+ np.arange(0, len(source)),
+ source,
+ )
+ return np.nan_to_num(target)
+
+ def coarse_f0(self, f0):
+ """Convert F0 to coarse F0."""
+ f0_mel = 1127 * np.log(1 + f0 / 700)
+ f0_mel = np.clip(
+ (f0_mel - self.f0_mel_min)
+ * (self.f0_bin - 2)
+ / (self.f0_mel_max - self.f0_mel_min)
+ + 1,
+ 1,
+ self.f0_bin - 1,
+ )
+ return np.rint(f0_mel).astype(int)
+
+ def process_file(self, file_info, f0_method, hop_length):
+ """Process a single audio file for F0 extraction."""
+ inp_path, opt_path1, opt_path2, _ = file_info
+
+ if os.path.exists(opt_path1) and os.path.exists(opt_path2):
+ return
+
+ try:
+ np_arr = load_audio(inp_path, 16000)
+ feature_pit = self.compute_f0(np_arr, f0_method, hop_length)
+ np.save(opt_path2, feature_pit, allow_pickle=False)
+ coarse_pit = self.coarse_f0(feature_pit)
+ np.save(opt_path1, coarse_pit, allow_pickle=False)
+ except Exception as error:
+ print(
+ f"An error occurred extracting file {inp_path} on {self.device}: {error}"
+ )
+
+ def process_files(
+ self, files, f0_method, hop_length, device_num, device, n_threads
+ ):
+ """Process multiple files."""
+ self.device = device
+ if f0_method == "rmvpe":
+ self.model_rmvpe = RMVPE0Predictor(
+ os.path.join("rvc", "models", "predictors", "rmvpe.pt"),
+ is_half=False,
+ device=device,
+ )
+ else:
+ n_threads = 1
+
+ n_threads = 1 if n_threads == 0 else n_threads
+
+ def process_file_wrapper(file_info):
+ self.process_file(file_info, f0_method, hop_length)
+
+ with tqdm.tqdm(total=len(files), leave=True, position=device_num) as pbar:
+ # using multi-threading
+ with concurrent.futures.ThreadPoolExecutor(
+ max_workers=n_threads
+ ) as executor:
+ futures = [
+ executor.submit(process_file_wrapper, file_info)
+ for file_info in files
+ ]
+ for future in concurrent.futures.as_completed(futures):
+ pbar.update(1)
+
+
+def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes):
+ devices_str = ", ".join(devices)
+ print(
+ f"Starting pitch extraction with {num_processes} cores on {devices_str} using {f0_method}..."
+ )
+ start_time = time.time()
+ fe = FeatureInput()
+ # split the task between devices
+ ps = []
+ num_devices = len(devices)
+ for i, device in enumerate(devices):
+ p = mp.Process(
+ target=fe.process_files,
+ args=(
+ files[i::num_devices],
+ f0_method,
+ hop_length,
+ i,
+ device,
+ num_processes // num_devices,
+ ),
+ )
+ ps.append(p)
+ p.start()
+ for i, device in enumerate(devices):
+ ps[i].join()
+
+ elapsed_time = time.time() - start_time
+ print(f"Pitch extraction completed in {elapsed_time:.2f} seconds.")
+
+
+def process_file_embedding(
+ files, version, embedder_model, embedder_model_custom, device_num, device, n_threads
+):
+ dtype = torch.float16 if config.is_half and "cuda" in device else torch.float32
+ model = load_embedding(embedder_model, embedder_model_custom).to(dtype).to(device)
+ n_threads = 1 if n_threads == 0 else n_threads
+
+ def process_file_embedding_wrapper(file_info):
+ wav_file_path, _, _, out_file_path = file_info
+ if os.path.exists(out_file_path):
+ return
+ feats = torch.from_numpy(load_audio(wav_file_path, 16000)).to(dtype).to(device)
+ feats = feats.view(1, -1)
+ with torch.no_grad():
+ feats = model(feats)["last_hidden_state"]
+ feats = (
+ model.final_proj(feats[0]).unsqueeze(0) if version == "v1" else feats
+ )
+ feats = feats.squeeze(0).float().cpu().numpy()
+ if not np.isnan(feats).any():
+ np.save(out_file_path, feats, allow_pickle=False)
+ else:
+ print(f"{file} contains NaN values and will be skipped.")
+
+ with tqdm.tqdm(total=len(files), leave=True, position=device_num) as pbar:
+ # using multi-threading
+ with concurrent.futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
+ futures = [
+ executor.submit(process_file_embedding_wrapper, file_info)
+ for file_info in files
+ ]
+ for future in concurrent.futures.as_completed(futures):
+ pbar.update(1)
+
+
+def run_embedding_extraction(
+ files, devices, version, embedder_model, embedder_model_custom
+):
+ start_time = time.time()
+ devices_str = ", ".join(devices)
+ print(
+ f"Starting embedding extraction with {num_processes} cores on {devices_str}..."
+ )
+ # split the task between devices
+ ps = []
+ num_devices = len(devices)
+ for i, device in enumerate(devices):
+ p = mp.Process(
+ target=process_file_embedding,
+ args=(
+ files[i::num_devices],
+ version,
+ embedder_model,
+ embedder_model_custom,
+ i,
+ device,
+ num_processes // num_devices,
+ ),
+ )
+ ps.append(p)
+ p.start()
+ for i, device in enumerate(devices):
+ ps[i].join()
+ elapsed_time = time.time() - start_time
+ print(f"Embedding extraction completed in {elapsed_time:.2f} seconds.")
+
+
+if __name__ == "__main__":
+
+ exp_dir = sys.argv[1]
+ f0_method = sys.argv[2]
+ hop_length = int(sys.argv[3])
+ num_processes = int(sys.argv[4])
+ gpus = sys.argv[5]
+ version = sys.argv[6]
+ pitch_guidance = sys.argv[7]
+ sample_rate = sys.argv[8]
+ embedder_model = sys.argv[9]
+ embedder_model_custom = sys.argv[10] if len(sys.argv) > 10 else None
+
+ # prep
+ wav_path = os.path.join(exp_dir, "sliced_audios_16k")
+ os.makedirs(os.path.join(exp_dir, "f0"), exist_ok=True)
+ os.makedirs(os.path.join(exp_dir, "f0_voiced"), exist_ok=True)
+ os.makedirs(os.path.join(exp_dir, version + "_extracted"), exist_ok=True)
+
+ files = []
+ for file in glob.glob(os.path.join(wav_path, "*.wav")):
+ file_name = os.path.basename(file)
+ file_info = [
+ file, # full path to sliced 16k wav
+ os.path.join(exp_dir, "f0", file_name + ".npy"),
+ os.path.join(exp_dir, "f0_voiced", file_name + ".npy"),
+ os.path.join(
+ exp_dir, version + "_extracted", file_name.replace("wav", "npy")
+ ),
+ ]
+ files.append(file_info)
+
+ devices = ["cpu"] if gpus == "-" else [f"cuda:{idx}" for idx in gpus.split("-")]
+ # Run Pitch Extraction
+ run_pitch_extraction(files, devices, f0_method, hop_length, num_processes)
+
+ # Run Embedding Extraction
+ run_embedding_extraction(
+ files, devices, version, embedder_model, embedder_model_custom
+ )
+
+ # Run Preparing Files
+ generate_config(version, sample_rate, exp_dir)
+ generate_filelist(pitch_guidance, exp_dir, version, sample_rate)
diff --git a/rvc/train/extract/preparing_files.py b/rvc/train/extract/preparing_files.py
new file mode 100644
index 0000000000000000000000000000000000000000..3956eb0c898ab1fc1c1e998689a3a9ce34830db3
--- /dev/null
+++ b/rvc/train/extract/preparing_files.py
@@ -0,0 +1,69 @@
+import os
+import shutil
+from random import shuffle
+from rvc.configs.config import Config
+
+config = Config()
+current_directory = os.getcwd()
+
+
+def generate_config(rvc_version: str, sample_rate: int, model_path: str):
+ config_path = os.path.join("rvc", "configs", rvc_version, f"{sample_rate}.json")
+ config_save_path = os.path.join(model_path, "config.json")
+ if not os.path.exists(config_save_path):
+ shutil.copyfile(config_path, config_save_path)
+
+
+def generate_filelist(
+ pitch_guidance: bool, model_path: str, rvc_version: str, sample_rate: int
+):
+ gt_wavs_dir = os.path.join(model_path, "sliced_audios")
+ feature_dir = os.path.join(model_path, f"{rvc_version}_extracted")
+
+ f0_dir, f0nsf_dir = None, None
+ if pitch_guidance:
+ f0_dir = os.path.join(model_path, "f0")
+ f0nsf_dir = os.path.join(model_path, "f0_voiced")
+
+ gt_wavs_files = set(name.split(".")[0] for name in os.listdir(gt_wavs_dir))
+ feature_files = set(name.split(".")[0] for name in os.listdir(feature_dir))
+
+ if pitch_guidance:
+ f0_files = set(name.split(".")[0] for name in os.listdir(f0_dir))
+ f0nsf_files = set(name.split(".")[0] for name in os.listdir(f0nsf_dir))
+ names = gt_wavs_files & feature_files & f0_files & f0nsf_files
+ else:
+ names = gt_wavs_files & feature_files
+
+ options = []
+ mute_base_path = os.path.join(current_directory, "logs", "mute")
+
+ for name in names:
+ if pitch_guidance:
+ options.append(
+ f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|{f0_dir}/{name}.wav.npy|{f0nsf_dir}/{name}.wav.npy|0"
+ )
+ else:
+ options.append(f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|0")
+
+ mute_audio_path = os.path.join(
+ mute_base_path, "sliced_audios", f"mute{sample_rate}.wav"
+ )
+ mute_feature_path = os.path.join(
+ mute_base_path, f"{rvc_version}_extracted", "mute.npy"
+ )
+
+ for _ in range(2):
+ if pitch_guidance:
+ mute_f0_path = os.path.join(mute_base_path, "f0", "mute.wav.npy")
+ mute_f0nsf_path = os.path.join(mute_base_path, "f0_voiced", "mute.wav.npy")
+ options.append(
+ f"{mute_audio_path}|{mute_feature_path}|{mute_f0_path}|{mute_f0nsf_path}|0"
+ )
+ else:
+ options.append(f"{mute_audio_path}|{mute_feature_path}|0")
+
+ shuffle(options)
+
+ with open(os.path.join(model_path, "filelist.txt"), "w") as f:
+ f.write("\n".join(options))
diff --git a/rvc/train/losses.py b/rvc/train/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d6642a66ef3e66445e1608ac2bcfa4dd333ca59
--- /dev/null
+++ b/rvc/train/losses.py
@@ -0,0 +1,84 @@
+import torch
+
+
+def feature_loss(fmap_r, fmap_g):
+ """
+ Compute the feature loss between reference and generated feature maps.
+
+ Args:
+ fmap_r (list of torch.Tensor): List of reference feature maps.
+ fmap_g (list of torch.Tensor): List of generated feature maps.
+ """
+ loss = 0
+ for dr, dg in zip(fmap_r, fmap_g):
+ for rl, gl in zip(dr, dg):
+ rl = rl.float().detach()
+ gl = gl.float()
+ loss += torch.mean(torch.abs(rl - gl))
+
+ return loss * 2
+
+
+def discriminator_loss(disc_real_outputs, disc_generated_outputs):
+ """
+ Compute the discriminator loss for real and generated outputs.
+
+ Args:
+ disc_real_outputs (list of torch.Tensor): List of discriminator outputs for real samples.
+ disc_generated_outputs (list of torch.Tensor): List of discriminator outputs for generated samples.
+ """
+ loss = 0
+ r_losses = []
+ g_losses = []
+ for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
+ dr = dr.float()
+ dg = dg.float()
+ r_loss = torch.mean((1 - dr) ** 2)
+ g_loss = torch.mean(dg**2)
+ loss += r_loss + g_loss
+ r_losses.append(r_loss.item())
+ g_losses.append(g_loss.item())
+
+ return loss, r_losses, g_losses
+
+
+def generator_loss(disc_outputs):
+ """
+ Compute the generator loss based on discriminator outputs.
+
+ Args:
+ disc_outputs (list of torch.Tensor): List of discriminator outputs for generated samples.
+ """
+ loss = 0
+ gen_losses = []
+ for dg in disc_outputs:
+ dg = dg.float()
+ l = torch.mean((1 - dg) ** 2)
+ gen_losses.append(l)
+ loss += l
+
+ return loss, gen_losses
+
+
+def kl_loss(z_p, logs_q, m_p, logs_p, z_mask):
+ """
+ Compute the Kullback-Leibler divergence loss.
+
+ Args:
+ z_p (torch.Tensor): Latent variable z_p [b, h, t_t].
+ logs_q (torch.Tensor): Log variance of q [b, h, t_t].
+ m_p (torch.Tensor): Mean of p [b, h, t_t].
+ logs_p (torch.Tensor): Log variance of p [b, h, t_t].
+ z_mask (torch.Tensor): Mask for the latent variables [b, h, t_t].
+ """
+ z_p = z_p.float()
+ logs_q = logs_q.float()
+ m_p = m_p.float()
+ logs_p = logs_p.float()
+ z_mask = z_mask.float()
+
+ kl = logs_p - logs_q - 0.5
+ kl += 0.5 * ((z_p - m_p) ** 2) * torch.exp(-2.0 * logs_p)
+ kl = torch.sum(kl * z_mask)
+ l = kl / torch.sum(z_mask)
+ return l
diff --git a/rvc/train/mel_processing.py b/rvc/train/mel_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f5e8f38b72f0cd9ec89ba0b725860f512d2f607
--- /dev/null
+++ b/rvc/train/mel_processing.py
@@ -0,0 +1,152 @@
+import torch
+import torch.utils.data
+from librosa.filters import mel as librosa_mel_fn
+
+
+def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
+ """
+ Dynamic range compression using log10.
+
+ Args:
+ x (torch.Tensor): Input tensor.
+ C (float, optional): Scaling factor. Defaults to 1.
+ clip_val (float, optional): Minimum value for clamping. Defaults to 1e-5.
+ """
+ return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def dynamic_range_decompression_torch(x, C=1):
+ """
+ Dynamic range decompression using exp.
+
+ Args:
+ x (torch.Tensor): Input tensor.
+ C (float, optional): Scaling factor. Defaults to 1.
+ """
+ return torch.exp(x) / C
+
+
+def spectral_normalize_torch(magnitudes):
+ """
+ Spectral normalization using dynamic range compression.
+
+ Args:
+ magnitudes (torch.Tensor): Magnitude spectrogram.
+ """
+ return dynamic_range_compression_torch(magnitudes)
+
+
+def spectral_de_normalize_torch(magnitudes):
+ """
+ Spectral de-normalization using dynamic range decompression.
+
+ Args:
+ magnitudes (torch.Tensor): Normalized spectrogram.
+ """
+ return dynamic_range_decompression_torch(magnitudes)
+
+
+mel_basis = {}
+hann_window = {}
+
+
+def spectrogram_torch(y, n_fft, hop_size, win_size, center=False):
+ """
+ Compute the spectrogram of a signal using STFT.
+
+ Args:
+ y (torch.Tensor): Input signal.
+ n_fft (int): FFT window size.
+ hop_size (int): Hop size between frames.
+ win_size (int): Window size.
+ center (bool, optional): Whether to center the window. Defaults to False.
+ """
+ global hann_window
+ dtype_device = str(y.dtype) + "_" + str(y.device)
+ wnsize_dtype_device = str(win_size) + "_" + dtype_device
+ if wnsize_dtype_device not in hann_window:
+ hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(
+ dtype=y.dtype, device=y.device
+ )
+
+ y = torch.nn.functional.pad(
+ y.unsqueeze(1),
+ (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)),
+ mode="reflect",
+ )
+ y = y.squeeze(1)
+
+ # Zluda, fall-back to CPU for FFTs since HIP SDK has no cuFFT alternative
+ source_device = y.device
+ if y.device.type == "cuda" and torch.cuda.get_device_name().endswith("[ZLUDA]"):
+ y = y.to("cpu")
+ hann_window[wnsize_dtype_device] = hann_window[wnsize_dtype_device].to("cpu")
+
+ spec = torch.stft(
+ y,
+ n_fft,
+ hop_length=hop_size,
+ win_length=win_size,
+ window=hann_window[wnsize_dtype_device],
+ center=center,
+ pad_mode="reflect",
+ normalized=False,
+ onesided=True,
+ return_complex=True,
+ ).to(source_device)
+
+ spec = torch.sqrt(spec.real.pow(2) + spec.imag.pow(2) + 1e-6)
+
+ return spec
+
+
+def spec_to_mel_torch(spec, n_fft, num_mels, sample_rate, fmin, fmax):
+ """
+ Convert a spectrogram to a mel-spectrogram.
+
+ Args:
+ spec (torch.Tensor): Magnitude spectrogram.
+ n_fft (int): FFT window size.
+ num_mels (int): Number of mel frequency bins.
+ sample_rate (int): Sampling rate of the audio signal.
+ fmin (float): Minimum frequency.
+ fmax (float): Maximum frequency.
+ """
+ global mel_basis
+ dtype_device = str(spec.dtype) + "_" + str(spec.device)
+ fmax_dtype_device = str(fmax) + "_" + dtype_device
+ if fmax_dtype_device not in mel_basis:
+ mel = librosa_mel_fn(
+ sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax
+ )
+ mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(
+ dtype=spec.dtype, device=spec.device
+ )
+
+ melspec = torch.matmul(mel_basis[fmax_dtype_device], spec)
+ melspec = spectral_normalize_torch(melspec)
+ return melspec
+
+
+def mel_spectrogram_torch(
+ y, n_fft, num_mels, sample_rate, hop_size, win_size, fmin, fmax, center=False
+):
+ """
+ Compute the mel-spectrogram of a signal.
+
+ Args:
+ y (torch.Tensor): Input signal.
+ n_fft (int): FFT window size.
+ num_mels (int): Number of mel frequency bins.
+ sample_rate (int): Sampling rate of the audio signal.
+ hop_size (int): Hop size between frames.
+ win_size (int): Window size.
+ fmin (float): Minimum frequency.
+ fmax (float): Maximum frequency.
+ center (bool, optional): Whether to center the window. Defaults to False.
+ """
+ spec = spectrogram_torch(y, n_fft, hop_size, win_size, center)
+
+ melspec = spec_to_mel_torch(spec, n_fft, num_mels, sample_rate, fmin, fmax)
+
+ return melspec
diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..15b3a151ccda45782238d91cf256c876129b6f4f
--- /dev/null
+++ b/rvc/train/preprocess/preprocess.py
@@ -0,0 +1,223 @@
+import os
+import sys
+import time
+from scipy import signal
+from scipy.io import wavfile
+import numpy as np
+import concurrent.futures
+from tqdm import tqdm
+import json
+from distutils.util import strtobool
+import librosa
+import multiprocessing
+
+now_directory = os.getcwd()
+sys.path.append(now_directory)
+
+from rvc.lib.utils import load_audio
+from rvc.train.preprocess.slicer import Slicer
+
+# Remove colab logs
+import logging
+
+logging.getLogger("numba.core.byteflow").setLevel(logging.WARNING)
+logging.getLogger("numba.core.ssa").setLevel(logging.WARNING)
+logging.getLogger("numba.core.interpreter").setLevel(logging.WARNING)
+
+# Constants
+OVERLAP = 0.3
+MAX_AMPLITUDE = 0.9
+ALPHA = 0.75
+HIGH_PASS_CUTOFF = 48
+SAMPLE_RATE_16K = 16000
+
+
+class PreProcess:
+ def __init__(self, sr: int, exp_dir: str, per: float):
+ self.slicer = Slicer(
+ sr=sr,
+ threshold=-42,
+ min_length=1500,
+ min_interval=400,
+ hop_size=15,
+ max_sil_kept=500,
+ )
+ self.sr = sr
+ self.b_high, self.a_high = signal.butter(
+ N=5, Wn=HIGH_PASS_CUTOFF, btype="high", fs=self.sr
+ )
+ self.per = per
+ self.exp_dir = exp_dir
+ self.device = "cpu"
+ self.gt_wavs_dir = os.path.join(exp_dir, "sliced_audios")
+ self.wavs16k_dir = os.path.join(exp_dir, "sliced_audios_16k")
+ os.makedirs(self.gt_wavs_dir, exist_ok=True)
+ os.makedirs(self.wavs16k_dir, exist_ok=True)
+
+ def _normalize_audio(self, audio: np.ndarray):
+ tmp_max = np.abs(audio).max()
+ if tmp_max > 2.5:
+ return None
+ return (audio / tmp_max * (MAX_AMPLITUDE * ALPHA)) + (1 - ALPHA) * audio
+
+ def process_audio_segment(
+ self,
+ audio_segment: np.ndarray,
+ idx0: int,
+ idx1: int,
+ process_effects: bool,
+ ):
+ normalized_audio = (
+ self._normalize_audio(audio_segment) if process_effects else audio_segment
+ )
+ if normalized_audio is None:
+ print(f"{idx0}-{idx1}-filtered")
+ return
+ wavfile.write(
+ os.path.join(self.gt_wavs_dir, f"{idx0}_{idx1}.wav"),
+ self.sr,
+ normalized_audio.astype(np.float32),
+ )
+ audio_16k = librosa.resample(
+ normalized_audio, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K
+ )
+ wavfile.write(
+ os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav"),
+ SAMPLE_RATE_16K,
+ audio_16k.astype(np.float32),
+ )
+
+ def process_audio(
+ self,
+ path: str,
+ idx0: int,
+ cut_preprocess: bool,
+ process_effects: bool,
+ ):
+ audio_length = 0
+ try:
+ audio = load_audio(path, self.sr)
+ audio_length = librosa.get_duration(y=audio, sr=self.sr)
+ if process_effects:
+ audio = signal.lfilter(self.b_high, self.a_high, audio)
+ idx1 = 0
+ if cut_preprocess:
+ for audio_segment in self.slicer.slice(audio):
+ i = 0
+ while True:
+ start = int(self.sr * (self.per - OVERLAP) * i)
+ i += 1
+ if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr:
+ tmp_audio = audio_segment[
+ start : start + int(self.per * self.sr)
+ ]
+ self.process_audio_segment(
+ tmp_audio, idx0, idx1, process_effects
+ )
+ idx1 += 1
+ else:
+ tmp_audio = audio_segment[start:]
+ self.process_audio_segment(
+ tmp_audio, idx0, idx1, process_effects
+ )
+ idx1 += 1
+ break
+ else:
+ self.process_audio_segment(audio, idx0, idx1, process_effects)
+ except Exception as error:
+ print(f"Error processing audio: {error}")
+ return audio_length
+
+
+def format_duration(seconds):
+ hours = int(seconds // 3600)
+ minutes = int((seconds % 3600) // 60)
+ seconds = int(seconds % 60)
+ return f"{hours:02}:{minutes:02}:{seconds:02}"
+
+
+def save_dataset_duration(file_path, dataset_duration):
+ try:
+ with open(file_path, "r") as f:
+ data = json.load(f)
+ except FileNotFoundError:
+ data = {}
+
+ formatted_duration = format_duration(dataset_duration)
+ new_data = {
+ "total_dataset_duration": formatted_duration,
+ "total_seconds": dataset_duration,
+ }
+ data.update(new_data)
+
+ with open(file_path, "w") as f:
+ json.dump(data, f, indent=4)
+
+
+def process_audio_wrapper(args):
+ pp, file, cut_preprocess, process_effects = args
+ file_path, idx0 = file
+ return pp.process_audio(file_path, idx0, cut_preprocess, process_effects)
+
+
+def preprocess_training_set(
+ input_root: str,
+ sr: int,
+ num_processes: int,
+ exp_dir: str,
+ per: float,
+ cut_preprocess: bool,
+ process_effects: bool,
+):
+ start_time = time.time()
+ pp = PreProcess(sr, exp_dir, per)
+ print(f"Starting preprocess with {num_processes} processes...")
+
+ files = [
+ (os.path.join(input_root, f), idx)
+ for idx, f in enumerate(os.listdir(input_root))
+ if f.lower().endswith((".wav", ".mp3", ".flac", ".ogg"))
+ ]
+ # print(f"Number of files: {len(files)}")
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor:
+ audio_length = list(
+ tqdm(
+ executor.map(
+ process_audio_wrapper,
+ [(pp, file, cut_preprocess, process_effects) for file in files],
+ ),
+ total=len(files),
+ )
+ )
+ audio_length = sum(audio_length)
+ save_dataset_duration(
+ os.path.join(exp_dir, "model_info.json"), dataset_duration=audio_length
+ )
+ elapsed_time = time.time() - start_time
+ print(
+ f"Preprocess completed in {elapsed_time:.2f} seconds on {format_duration(audio_length)} seconds of audio."
+ )
+
+
+if __name__ == "__main__":
+ experiment_directory = str(sys.argv[1])
+ input_root = str(sys.argv[2])
+ sample_rate = int(sys.argv[3])
+ percentage = float(sys.argv[4])
+ num_processes = sys.argv[5]
+ if num_processes.lower() == "none":
+ num_processes = multiprocessing.cpu_count()
+ else:
+ num_processes = int(num_processes)
+ cut_preprocess = strtobool(sys.argv[6])
+ process_effects = strtobool(sys.argv[7])
+
+ preprocess_training_set(
+ input_root,
+ sample_rate,
+ num_processes,
+ experiment_directory,
+ percentage,
+ cut_preprocess,
+ process_effects,
+ )
diff --git a/rvc/train/preprocess/slicer.py b/rvc/train/preprocess/slicer.py
new file mode 100644
index 0000000000000000000000000000000000000000..09c4f9a556dee5e8ef506115ccf3ace328ffaaa6
--- /dev/null
+++ b/rvc/train/preprocess/slicer.py
@@ -0,0 +1,235 @@
+import numpy as np
+
+
+class Slicer:
+ """
+ A class for slicing audio waveforms into segments based on silence detection.
+
+ Attributes:
+ sr (int): Sampling rate of the audio waveform.
+ threshold (float): RMS threshold for silence detection, in dB.
+ min_length (int): Minimum length of a segment, in milliseconds.
+ min_interval (int): Minimum interval between segments, in milliseconds.
+ hop_size (int): Hop size for RMS calculation, in milliseconds.
+ max_sil_kept (int): Maximum length of silence to keep at the beginning or end of a segment, in milliseconds.
+
+ Methods:
+ slice(waveform): Slices the given waveform into segments.
+ """
+
+ def __init__(
+ self,
+ sr: int,
+ threshold: float = -40.0,
+ min_length: int = 5000,
+ min_interval: int = 300,
+ hop_size: int = 20,
+ max_sil_kept: int = 5000,
+ ):
+ """
+ Initializes a Slicer object.
+
+ Args:
+ sr (int): Sampling rate of the audio waveform.
+ threshold (float, optional): RMS threshold for silence detection, in dB. Defaults to -40.0.
+ min_length (int, optional): Minimum length of a segment, in milliseconds. Defaults to 5000.
+ min_interval (int, optional): Minimum interval between segments, in milliseconds. Defaults to 300.
+ hop_size (int, optional): Hop size for RMS calculation, in milliseconds. Defaults to 20.
+ max_sil_kept (int, optional): Maximum length of silence to keep at the beginning or end of a segment, in milliseconds. Defaults to 5000.
+
+ Raises:
+ ValueError: If the input parameters are not valid.
+ """
+ if not min_length >= min_interval >= hop_size:
+ raise ValueError("min_length >= min_interval >= hop_size is required")
+ if not max_sil_kept >= hop_size:
+ raise ValueError("max_sil_kept >= hop_size is required")
+
+ # Convert time-based parameters to sample-based parameters
+ min_interval = sr * min_interval / 1000
+ self.threshold = 10 ** (threshold / 20.0)
+ self.hop_size = round(sr * hop_size / 1000)
+ self.win_size = min(round(min_interval), 4 * self.hop_size)
+ self.min_length = round(sr * min_length / 1000 / self.hop_size)
+ self.min_interval = round(min_interval / self.hop_size)
+ self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)
+
+ def _apply_slice(self, waveform, begin, end):
+ """
+ Applies a slice to the waveform.
+
+ Args:
+ waveform (numpy.ndarray): The waveform to slice.
+ begin (int): Start frame index.
+ end (int): End frame index.
+ """
+ start_idx = begin * self.hop_size
+ if len(waveform.shape) > 1:
+ end_idx = min(waveform.shape[1], end * self.hop_size)
+ return waveform[:, start_idx:end_idx]
+ else:
+ end_idx = min(waveform.shape[0], end * self.hop_size)
+ return waveform[start_idx:end_idx]
+
+ def slice(self, waveform):
+ """
+ Slices the given waveform into segments.
+
+ Args:
+ waveform (numpy.ndarray): The waveform to slice.
+ """
+ # Calculate RMS for each frame
+ samples = waveform.mean(axis=0) if len(waveform.shape) > 1 else waveform
+ if samples.shape[0] <= self.min_length:
+ return [waveform]
+
+ rms_list = get_rms(
+ y=samples, frame_length=self.win_size, hop_length=self.hop_size
+ ).squeeze(0)
+
+ # Detect silence segments and mark them
+ sil_tags = []
+ silence_start, clip_start = None, 0
+ for i, rms in enumerate(rms_list):
+ # If current frame is silent
+ if rms < self.threshold:
+ if silence_start is None:
+ silence_start = i
+ continue
+
+ # If current frame is not silent
+ if silence_start is None:
+ continue
+
+ # Check if current silence segment is leading silence or need to slice
+ is_leading_silence = silence_start == 0 and i > self.max_sil_kept
+ need_slice_middle = (
+ i - silence_start >= self.min_interval
+ and i - clip_start >= self.min_length
+ )
+
+ # If not leading silence and not need to slice middle
+ if not is_leading_silence and not need_slice_middle:
+ silence_start = None
+ continue
+
+ # Handle different cases of silence segments
+ if i - silence_start <= self.max_sil_kept:
+ # Short silence
+ pos = rms_list[silence_start : i + 1].argmin() + silence_start
+ if silence_start == 0:
+ sil_tags.append((0, pos))
+ else:
+ sil_tags.append((pos, pos))
+ clip_start = pos
+ elif i - silence_start <= self.max_sil_kept * 2:
+ # Medium silence
+ pos = rms_list[
+ i - self.max_sil_kept : silence_start + self.max_sil_kept + 1
+ ].argmin()
+ pos += i - self.max_sil_kept
+ pos_l = (
+ rms_list[
+ silence_start : silence_start + self.max_sil_kept + 1
+ ].argmin()
+ + silence_start
+ )
+ pos_r = (
+ rms_list[i - self.max_sil_kept : i + 1].argmin()
+ + i
+ - self.max_sil_kept
+ )
+ if silence_start == 0:
+ sil_tags.append((0, pos_r))
+ clip_start = pos_r
+ else:
+ sil_tags.append((min(pos_l, pos), max(pos_r, pos)))
+ clip_start = max(pos_r, pos)
+ else:
+ # Long silence
+ pos_l = (
+ rms_list[
+ silence_start : silence_start + self.max_sil_kept + 1
+ ].argmin()
+ + silence_start
+ )
+ pos_r = (
+ rms_list[i - self.max_sil_kept : i + 1].argmin()
+ + i
+ - self.max_sil_kept
+ )
+ if silence_start == 0:
+ sil_tags.append((0, pos_r))
+ else:
+ sil_tags.append((pos_l, pos_r))
+ clip_start = pos_r
+ silence_start = None
+
+ # Handle trailing silence
+ total_frames = rms_list.shape[0]
+ if (
+ silence_start is not None
+ and total_frames - silence_start >= self.min_interval
+ ):
+ silence_end = min(total_frames, silence_start + self.max_sil_kept)
+ pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start
+ sil_tags.append((pos, total_frames + 1))
+
+ # Extract segments based on silence tags
+ if not sil_tags:
+ return [waveform]
+ else:
+ chunks = []
+ if sil_tags[0][0] > 0:
+ chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))
+
+ for i in range(len(sil_tags) - 1):
+ chunks.append(
+ self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0])
+ )
+
+ if sil_tags[-1][1] < total_frames:
+ chunks.append(
+ self._apply_slice(waveform, sil_tags[-1][1], total_frames)
+ )
+
+ return chunks
+
+
+def get_rms(
+ y,
+ frame_length=2048,
+ hop_length=512,
+ pad_mode="constant",
+):
+ """
+ Calculates the root mean square (RMS) of a waveform.
+
+ Args:
+ y (numpy.ndarray): The waveform.
+ frame_length (int, optional): The length of the frame in samples. Defaults to 2048.
+ hop_length (int, optional): The hop length between frames in samples. Defaults to 512.
+ pad_mode (str, optional): The padding mode used for the waveform. Defaults to "constant".
+ """
+ padding = (int(frame_length // 2), int(frame_length // 2))
+ y = np.pad(y, padding, mode=pad_mode)
+
+ axis = -1
+ out_strides = y.strides + tuple([y.strides[axis]])
+ x_shape_trimmed = list(y.shape)
+ x_shape_trimmed[axis] -= frame_length - 1
+ out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
+ xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
+
+ if axis < 0:
+ target_axis = axis - 1
+ else:
+ target_axis = axis + 1
+
+ xw = np.moveaxis(xw, -1, target_axis)
+ slices = [slice(None)] * xw.ndim
+ slices[axis] = slice(0, None, hop_length)
+ x = xw[tuple(slices)]
+
+ power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
+ return np.sqrt(power)
diff --git a/rvc/train/process/change_info.py b/rvc/train/process/change_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e5f680b33eec70388744dbc1e58fbd24e2a02d8
--- /dev/null
+++ b/rvc/train/process/change_info.py
@@ -0,0 +1,14 @@
+import os
+import torch
+
+
+def change_info(path, info, name):
+ try:
+ ckpt = torch.load(path, map_location="cpu")
+ ckpt["info"] = info
+ if name == "":
+ name = os.path.basename(path)
+ torch.save(ckpt, f"logs/{name}/{name}")
+ return "Success."
+ except Exception as error:
+ print(f"An error occurred changing the info: {error}")
diff --git a/rvc/train/process/extract_index.py b/rvc/train/process/extract_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdbd779fbe0c2b639d1c4fc521f90ca661deb1f3
--- /dev/null
+++ b/rvc/train/process/extract_index.py
@@ -0,0 +1,88 @@
+import os
+import sys
+import faiss
+import numpy as np
+from sklearn.cluster import MiniBatchKMeans
+from multiprocessing import cpu_count
+
+# Parse command line arguments
+exp_dir = str(sys.argv[1])
+version = str(sys.argv[2])
+index_algorithm = str(sys.argv[3])
+
+try:
+ feature_dir = os.path.join(exp_dir, f"{version}_extracted")
+ model_name = os.path.basename(exp_dir)
+
+ index_filename_added = f"added_{model_name}_{version}.index"
+ index_filepath_added = os.path.join(exp_dir, index_filename_added)
+
+ index_filename_trained = f"trained_{model_name}_{version}.index"
+ index_filepath_trained = os.path.join(exp_dir, index_filename_trained)
+
+ if os.path.exists(index_filepath_trained) and os.path.exists(index_filepath_added):
+ pass
+ else:
+ npys = []
+ listdir_res = sorted(os.listdir(feature_dir))
+
+ for name in listdir_res:
+ file_path = os.path.join(feature_dir, name)
+ phone = np.load(file_path)
+ npys.append(phone)
+
+ big_npy = np.concatenate(npys, axis=0)
+
+ big_npy_idx = np.arange(big_npy.shape[0])
+ np.random.shuffle(big_npy_idx)
+ big_npy = big_npy[big_npy_idx]
+
+ if big_npy.shape[0] > 2e5 and (
+ index_algorithm == "Auto" or index_algorithm == "KMeans"
+ ):
+ big_npy = (
+ MiniBatchKMeans(
+ n_clusters=10000,
+ verbose=True,
+ batch_size=256 * cpu_count(),
+ compute_labels=False,
+ init="random",
+ )
+ .fit(big_npy)
+ .cluster_centers_
+ )
+
+ np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy)
+
+ n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+
+ # index_trained
+ index_trained = faiss.index_factory(
+ 256 if version == "v1" else 768, f"IVF{n_ivf},Flat"
+ )
+ index_ivf_trained = faiss.extract_index_ivf(index_trained)
+ index_ivf_trained.nprobe = 1
+ index_trained.train(big_npy)
+
+ faiss.write_index(index_trained, index_filepath_trained)
+
+ # index_added
+ index_added = faiss.index_factory(
+ 256 if version == "v1" else 768, f"IVF{n_ivf},Flat"
+ )
+ index_ivf_added = faiss.extract_index_ivf(index_added)
+ index_ivf_added.nprobe = 1
+ index_added.train(big_npy)
+
+ batch_size_add = 8192
+ for i in range(0, big_npy.shape[0], batch_size_add):
+ index_added.add(big_npy[i : i + batch_size_add])
+
+ faiss.write_index(index_added, index_filepath_added)
+ print(f"Saved index file '{index_filepath_added}'")
+
+except Exception as error:
+ print(f"An error occurred extracting the index: {error}")
+ print(
+ "If you are running this code in a virtual environment, make sure you have enough GPU available to generate the Index file."
+ )
diff --git a/rvc/train/process/extract_model.py b/rvc/train/process/extract_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4abde9fdba8fad1ab5f97410e96fbe993160777
--- /dev/null
+++ b/rvc/train/process/extract_model.py
@@ -0,0 +1,122 @@
+import os, sys
+import torch
+import hashlib
+import datetime
+from collections import OrderedDict
+import json
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+
+def replace_keys_in_dict(d, old_key_part, new_key_part):
+ if isinstance(d, OrderedDict):
+ updated_dict = OrderedDict()
+ else:
+ updated_dict = {}
+ for key, value in d.items():
+ new_key = key.replace(old_key_part, new_key_part)
+ if isinstance(value, dict):
+ value = replace_keys_in_dict(value, old_key_part, new_key_part)
+ updated_dict[new_key] = value
+ return updated_dict
+
+
+def extract_model(
+ ckpt,
+ sr,
+ pitch_guidance,
+ name,
+ model_dir,
+ epoch,
+ step,
+ version,
+ hps,
+ overtrain_info,
+):
+ try:
+ print(f"Saved model '{model_dir}' (epoch {epoch} and step {step})")
+
+ model_dir_path = os.path.dirname(model_dir)
+ os.makedirs(model_dir_path, exist_ok=True)
+
+ if "best_epoch" in model_dir:
+ pth_file = f"{name}_{epoch}e_{step}s_best_epoch.pth"
+ else:
+ pth_file = f"{name}_{epoch}e_{step}s.pth"
+
+ pth_file_old_version_path = os.path.join(
+ model_dir_path, f"{pth_file}_old_version.pth"
+ )
+
+ model_dir_path = os.path.dirname(model_dir)
+ if os.path.exists(os.path.join(model_dir_path, "model_info.json")):
+ with open(os.path.join(model_dir_path, "model_info.json"), "r") as f:
+ data = json.load(f)
+ dataset_lenght = data.get("total_dataset_duration", None)
+ else:
+ dataset_lenght = None
+
+ with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
+ data = json.load(f)
+ model_author = data.get("model_author", None)
+
+ opt = OrderedDict(
+ weight={
+ key: value.half() for key, value in ckpt.items() if "enc_q" not in key
+ }
+ )
+ opt["config"] = [
+ hps.data.filter_length // 2 + 1,
+ 32,
+ hps.model.inter_channels,
+ hps.model.hidden_channels,
+ hps.model.filter_channels,
+ hps.model.n_heads,
+ hps.model.n_layers,
+ hps.model.kernel_size,
+ hps.model.p_dropout,
+ hps.model.resblock,
+ hps.model.resblock_kernel_sizes,
+ hps.model.resblock_dilation_sizes,
+ hps.model.upsample_rates,
+ hps.model.upsample_initial_channel,
+ hps.model.upsample_kernel_sizes,
+ hps.model.spk_embed_dim,
+ hps.model.gin_channels,
+ hps.data.sample_rate,
+ ]
+
+ opt["epoch"] = epoch
+ opt["step"] = step
+ opt["sr"] = sr
+ opt["f0"] = pitch_guidance
+ opt["version"] = version
+ opt["creation_date"] = datetime.datetime.now().isoformat()
+
+ hash_input = f"{str(ckpt)} {epoch} {step} {datetime.datetime.now().isoformat()}"
+ model_hash = hashlib.sha256(hash_input.encode()).hexdigest()
+ opt["model_hash"] = model_hash
+ opt["overtrain_info"] = overtrain_info
+ opt["dataset_lenght"] = dataset_lenght
+ opt["model_name"] = name
+ opt["author"] = model_author
+
+ torch.save(opt, os.path.join(model_dir_path, pth_file))
+
+ model = torch.load(model_dir, map_location=torch.device("cpu"))
+ torch.save(
+ replace_keys_in_dict(
+ replace_keys_in_dict(
+ model, ".parametrizations.weight.original1", ".weight_v"
+ ),
+ ".parametrizations.weight.original0",
+ ".weight_g",
+ ),
+ pth_file_old_version_path,
+ )
+ os.remove(model_dir)
+ os.rename(pth_file_old_version_path, model_dir)
+
+ except Exception as error:
+ print(f"An error occurred extracting the model: {error}")
diff --git a/rvc/train/process/extract_small_model.py b/rvc/train/process/extract_small_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b0f484d795a283dde0cb8b50d6b5e9317ff9e7a
--- /dev/null
+++ b/rvc/train/process/extract_small_model.py
@@ -0,0 +1,183 @@
+import os
+import torch
+import hashlib
+import datetime
+from collections import OrderedDict
+
+
+def replace_keys_in_dict(d, old_key_part, new_key_part):
+ # Use OrderedDict if the original is an OrderedDict
+ if isinstance(d, OrderedDict):
+ updated_dict = OrderedDict()
+ else:
+ updated_dict = {}
+ for key, value in d.items():
+ # Replace the key part if found
+ new_key = key.replace(old_key_part, new_key_part)
+ # If the value is a dictionary, apply the function recursively
+ if isinstance(value, dict):
+ value = replace_keys_in_dict(value, old_key_part, new_key_part)
+ updated_dict[new_key] = value
+ return updated_dict
+
+
+def extract_small_model(
+ path: str,
+ name: str,
+ sr: int,
+ pitch_guidance: bool,
+ version: str,
+ epoch: int,
+ step: int,
+):
+ try:
+ ckpt = torch.load(path, map_location="cpu")
+ pth_file = f"{name}.pth"
+ pth_file_old_version_path = os.path.join("logs", f"{pth_file}_old_version.pth")
+ opt = OrderedDict(
+ weight={
+ key: value.half() for key, value in ckpt.items() if "enc_q" not in key
+ }
+ )
+ if "model" in ckpt:
+ ckpt = ckpt["model"]
+ opt = OrderedDict()
+ opt["weight"] = {}
+ for key in ckpt.keys():
+ if "enc_q" in key:
+ continue
+ opt["weight"][key] = ckpt[key].half()
+ if sr == "40000":
+ opt["config"] = [
+ 1025,
+ 32,
+ 192,
+ 192,
+ 768,
+ 2,
+ 6,
+ 3,
+ 0,
+ "1",
+ [3, 7, 11],
+ [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+ [10, 10, 2, 2],
+ 512,
+ [16, 16, 4, 4],
+ 109,
+ 256,
+ 40000,
+ ]
+ elif sr == "48000":
+ if version == "v1":
+ opt["config"] = [
+ 1025,
+ 32,
+ 192,
+ 192,
+ 768,
+ 2,
+ 6,
+ 3,
+ 0,
+ "1",
+ [3, 7, 11],
+ [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+ [10, 6, 2, 2, 2],
+ 512,
+ [16, 16, 4, 4, 4],
+ 109,
+ 256,
+ 48000,
+ ]
+ else:
+ opt["config"] = [
+ 1025,
+ 32,
+ 192,
+ 192,
+ 768,
+ 2,
+ 6,
+ 3,
+ 0,
+ "1",
+ [3, 7, 11],
+ [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+ [12, 10, 2, 2],
+ 512,
+ [24, 20, 4, 4],
+ 109,
+ 256,
+ 48000,
+ ]
+ elif sr == "32000":
+ if version == "v1":
+ opt["config"] = [
+ 513,
+ 32,
+ 192,
+ 192,
+ 768,
+ 2,
+ 6,
+ 3,
+ 0,
+ "1",
+ [3, 7, 11],
+ [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+ [10, 4, 2, 2, 2],
+ 512,
+ [16, 16, 4, 4, 4],
+ 109,
+ 256,
+ 32000,
+ ]
+ else:
+ opt["config"] = [
+ 513,
+ 32,
+ 192,
+ 192,
+ 768,
+ 2,
+ 6,
+ 3,
+ 0,
+ "1",
+ [3, 7, 11],
+ [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+ [10, 8, 2, 2],
+ 512,
+ [20, 16, 4, 4],
+ 109,
+ 256,
+ 32000,
+ ]
+
+ opt["epoch"] = epoch
+ opt["step"] = step
+ opt["sr"] = sr
+ opt["f0"] = int(pitch_guidance)
+ opt["version"] = version
+ opt["creation_date"] = datetime.datetime.now().isoformat()
+
+ hash_input = f"{str(ckpt)} {epoch} {step} {datetime.datetime.now().isoformat()}"
+ model_hash = hashlib.sha256(hash_input.encode()).hexdigest()
+ opt["model_hash"] = model_hash
+
+ model = torch.load(pth_file_old_version_path, map_location=torch.device("cpu"))
+ torch.save(
+ replace_keys_in_dict(
+ replace_keys_in_dict(
+ model, ".parametrizations.weight.original1", ".weight_v"
+ ),
+ ".parametrizations.weight.original0",
+ ".weight_g",
+ ),
+ pth_file_old_version_path,
+ )
+ os.remove(pth_file_old_version_path)
+ os.rename(pth_file_old_version_path, pth_file)
+ except Exception as error:
+ print(f"An error occurred extracting the model: {error}")
diff --git a/rvc/train/process/model_blender.py b/rvc/train/process/model_blender.py
new file mode 100644
index 0000000000000000000000000000000000000000..e08105d5f597269f8f1e44f4099d20c4d33bd1df
--- /dev/null
+++ b/rvc/train/process/model_blender.py
@@ -0,0 +1,68 @@
+import os
+import torch
+from collections import OrderedDict
+
+
+def extract(ckpt):
+ a = ckpt["model"]
+ opt = OrderedDict()
+ opt["weight"] = {}
+ for key in a.keys():
+ if "enc_q" in key:
+ continue
+ opt["weight"][key] = a[key]
+ return opt
+
+
+def model_blender(name, path1, path2, ratio):
+ try:
+ message = f"Model {path1} and {path2} are merged with alpha {ratio}."
+ ckpt1 = torch.load(path1, map_location="cpu")
+ ckpt2 = torch.load(path2, map_location="cpu")
+
+ if ckpt1["sr"] != ckpt2["sr"]:
+ return "The sample rates of the two models are not the same."
+
+ cfg = ckpt1["config"]
+ cfg_f0 = ckpt1["f0"]
+ cfg_version = ckpt1["version"]
+ cfg_sr = ckpt1["sr"]
+
+ if "model" in ckpt1:
+ ckpt1 = extract(ckpt1)
+ else:
+ ckpt1 = ckpt1["weight"]
+ if "model" in ckpt2:
+ ckpt2 = extract(ckpt2)
+ else:
+ ckpt2 = ckpt2["weight"]
+
+ if sorted(list(ckpt1.keys())) != sorted(list(ckpt2.keys())):
+ return "Fail to merge the models. The model architectures are not the same."
+
+ opt = OrderedDict()
+ opt["weight"] = {}
+ for key in ckpt1.keys():
+ if key == "emb_g.weight" and ckpt1[key].shape != ckpt2[key].shape:
+ min_shape0 = min(ckpt1[key].shape[0], ckpt2[key].shape[0])
+ opt["weight"][key] = (
+ ratio * (ckpt1[key][:min_shape0].float())
+ + (1 - ratio) * (ckpt2[key][:min_shape0].float())
+ ).half()
+ else:
+ opt["weight"][key] = (
+ ratio * (ckpt1[key].float()) + (1 - ratio) * (ckpt2[key].float())
+ ).half()
+
+ opt["config"] = cfg
+ opt["sr"] = cfg_sr
+ opt["f0"] = cfg_f0
+ opt["version"] = cfg_version
+ opt["info"] = message
+
+ torch.save(opt, os.path.join("logs", f"{name}.pth"))
+ print(message)
+ return message, os.path.join("logs", f"{name}.pth")
+ except Exception as error:
+ print(f"An error occurred blending the models: {error}")
+ return error
diff --git a/rvc/train/process/model_information.py b/rvc/train/process/model_information.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5e6051b6a3f4ac53644f16508d72eafedfad0f1
--- /dev/null
+++ b/rvc/train/process/model_information.py
@@ -0,0 +1,48 @@
+import torch
+from datetime import datetime
+
+
+def prettify_date(date_str):
+ if date_str is None:
+ return "None"
+ try:
+ date_time_obj = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
+ return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return "Invalid date format"
+
+
+def model_information(path):
+ model_data = torch.load(path, map_location="cpu")
+
+ print(f"Loaded model from {path}")
+
+ model_name = model_data.get("model_name", "None")
+ epochs = model_data.get("epoch", "None")
+ steps = model_data.get("step", "None")
+ sr = model_data.get("sr", "None")
+ f0 = model_data.get("f0", "None")
+ dataset_lenght = model_data.get("dataset_lenght", "None")
+ version = model_data.get("version", "None")
+ creation_date = model_data.get("creation_date", "None")
+ model_hash = model_data.get("model_hash", None)
+ overtrain_info = model_data.get("overtrain_info", "None")
+ model_author = model_data.get("author", "None")
+
+ pitch_guidance = "True" if f0 == 1 else "False"
+
+ creation_date_str = prettify_date(creation_date) if creation_date else "None"
+
+ return (
+ f"Model Name: {model_name}\n"
+ f"Model Creator: {model_author}\n"
+ f"Epochs: {epochs}\n"
+ f"Steps: {steps}\n"
+ f"Model Architecture: {version}\n"
+ f"Sampling Rate: {sr}\n"
+ f"Pitch Guidance: {pitch_guidance}\n"
+ f"Dataset Length: {dataset_lenght}\n"
+ f"Creation Date: {creation_date_str}\n"
+ f"Hash (ID): {model_hash}\n"
+ f"Overtrain Info: {overtrain_info}"
+ )
diff --git a/rvc/train/train.py b/rvc/train/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6ec9434bc873ae948f9d67dad2cf5b55b486a54
--- /dev/null
+++ b/rvc/train/train.py
@@ -0,0 +1,1057 @@
+import torch
+import sys
+import os
+import datetime
+import glob
+import json
+import re
+from distutils.util import strtobool
+
+from utils import (
+ HParams,
+ plot_spectrogram_to_numpy,
+ summarize,
+ load_checkpoint,
+ save_checkpoint,
+ latest_checkpoint_path,
+)
+from random import randint, shuffle
+from time import sleep
+from time import time as ttime
+from tqdm import tqdm
+
+from torch.cuda.amp import GradScaler, autocast
+
+from torch.nn import functional as F
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+import torch.distributed as dist
+import torch.multiprocessing as mp
+
+now_dir = os.getcwd()
+sys.path.append(os.path.join(now_dir))
+
+from data_utils import (
+ DistributedBucketSampler,
+ TextAudioCollate,
+ TextAudioCollateMultiNSFsid,
+ TextAudioLoader,
+ TextAudioLoaderMultiNSFsid,
+)
+
+from losses import (
+ discriminator_loss,
+ feature_loss,
+ generator_loss,
+ kl_loss,
+)
+from mel_processing import mel_spectrogram_torch, spec_to_mel_torch
+
+from rvc.train.process.extract_model import extract_model
+
+from rvc.lib.algorithm import commons
+from rvc.lib.algorithm.discriminators import MultiPeriodDiscriminator
+from rvc.lib.algorithm.discriminators import MultiPeriodDiscriminatorV2
+from rvc.lib.algorithm.synthesizers import Synthesizer
+
+# Parse command line arguments
+model_name = sys.argv[1]
+save_every_epoch = int(sys.argv[2])
+total_epoch = int(sys.argv[3])
+pretrainG = sys.argv[4]
+pretrainD = sys.argv[5]
+version = sys.argv[6]
+gpus = sys.argv[7]
+batch_size = int(sys.argv[8])
+sample_rate = int(sys.argv[9])
+pitch_guidance = strtobool(sys.argv[10])
+save_only_latest = strtobool(sys.argv[11])
+save_every_weights = strtobool(sys.argv[12])
+cache_data_in_gpu = strtobool(sys.argv[13])
+overtraining_detector = strtobool(sys.argv[14])
+overtraining_threshold = int(sys.argv[15])
+sync_graph = strtobool(sys.argv[16])
+
+current_dir = os.getcwd()
+experiment_dir = os.path.join(current_dir, "logs", model_name)
+config_save_path = os.path.join(experiment_dir, "config.json")
+dataset_path = os.path.join(experiment_dir, "sliced_audios")
+
+with open(config_save_path, "r") as f:
+ config = json.load(f)
+config = HParams(**config)
+config.data.training_files = os.path.join(experiment_dir, "filelist.txt")
+
+os.environ["CUDA_VISIBLE_DEVICES"] = gpus.replace("-", ",")
+n_gpus = len(gpus.split("-"))
+
+torch.backends.cudnn.deterministic = False
+torch.backends.cudnn.benchmark = False
+
+global_step = 0
+last_loss_gen_all = 0
+overtrain_save_epoch = 0
+loss_gen_history = []
+smoothed_loss_gen_history = []
+loss_disc_history = []
+smoothed_loss_disc_history = []
+lowest_value = {"step": 0, "value": float("inf"), "epoch": 0}
+training_file_path = os.path.join(experiment_dir, "training_data.json")
+overtrain_info = None
+
+import logging
+
+logging.getLogger("torch").setLevel(logging.ERROR)
+
+
+class EpochRecorder:
+ """
+ Records the time elapsed per epoch.
+ """
+
+ def __init__(self):
+ self.last_time = ttime()
+
+ def record(self):
+ """
+ Records the elapsed time and returns a formatted string.
+ """
+ now_time = ttime()
+ elapsed_time = now_time - self.last_time
+ self.last_time = now_time
+ elapsed_time = round(elapsed_time, 1)
+ elapsed_time_str = str(datetime.timedelta(seconds=int(elapsed_time)))
+ current_time = datetime.datetime.now().strftime("%H:%M:%S")
+ return f"time={current_time} | training_speed={elapsed_time_str}"
+
+
+def main():
+ """
+ Main function to start the training process.
+ """
+ global training_file_path, last_loss_gen_all, smoothed_loss_gen_history, loss_gen_history, loss_disc_history, smoothed_loss_disc_history, overtrain_save_epoch
+ os.environ["MASTER_ADDR"] = "localhost"
+ os.environ["MASTER_PORT"] = str(randint(20000, 55555))
+
+ def start():
+ """
+ Starts the training process with multi-GPU support.
+ """
+ children = []
+ pid_data = {"process_pids": []}
+ with open(config_save_path, "r") as pid_file:
+ try:
+ existing_data = json.load(pid_file)
+ pid_data.update(existing_data)
+ except json.JSONDecodeError:
+ pass
+ with open(config_save_path, "w") as pid_file:
+ for i in range(n_gpus):
+ subproc = mp.Process(
+ target=run,
+ args=(
+ i,
+ n_gpus,
+ experiment_dir,
+ pretrainG,
+ pretrainD,
+ pitch_guidance,
+ custom_total_epoch,
+ custom_save_every_weights,
+ config,
+ ),
+ )
+ children.append(subproc)
+ subproc.start()
+ pid_data["process_pids"].append(subproc.pid)
+ json.dump(pid_data, pid_file, indent=4)
+
+ for i in range(n_gpus):
+ children[i].join()
+
+ def load_from_json(file_path):
+ """
+ Load data from a JSON file.
+
+ Args:
+ file_path (str): The path to the JSON file.
+ """
+ if os.path.exists(file_path):
+ with open(file_path, "r") as f:
+ data = json.load(f)
+ return (
+ data.get("loss_disc_history", []),
+ data.get("smoothed_loss_disc_history", []),
+ data.get("loss_gen_history", []),
+ data.get("smoothed_loss_gen_history", []),
+ )
+ return [], [], [], []
+
+ def continue_overtrain_detector(training_file_path):
+ """
+ Continues the overtrain detector by loading the training history from a JSON file.
+
+ Args:
+ training_file_path (str): The file path of the JSON file containing the training history.
+ """
+ if overtraining_detector:
+ if os.path.exists(training_file_path):
+ (
+ loss_disc_history,
+ smoothed_loss_disc_history,
+ loss_gen_history,
+ smoothed_loss_gen_history,
+ ) = load_from_json(training_file_path)
+
+ n_gpus = torch.cuda.device_count()
+
+ if torch.cuda.is_available() == False and torch.backends.mps.is_available() == True:
+ n_gpus = 1
+ if n_gpus < 1:
+ print("GPU not detected, reverting to CPU (not recommended)")
+ n_gpus = 1
+
+ if sync_graph == True:
+ print(
+ "Sync graph is now activated! With sync graph enabled, the model undergoes a single epoch of training. Once the graphs are synchronized, training proceeds for the previously specified number of epochs."
+ )
+ custom_total_epoch = 1
+ custom_save_every_weights = True
+ start()
+
+ # Synchronize graphs by modifying config files
+ model_config_file = os.path.join(experiment_dir, "config.json")
+ rvc_config_file = os.path.join(
+ now_dir, "rvc", "configs", version, str(sample_rate) + ".json"
+ )
+ if not os.path.exists(rvc_config_file):
+ rvc_config_file = os.path.join(
+ now_dir, "rvc", "configs", "v1", str(sample_rate) + ".json"
+ )
+
+ pattern = rf"{os.path.basename(model_name)}_(\d+)e_(\d+)s\.pth"
+
+ for filename in os.listdir(experiment_dir):
+ match = re.match(pattern, filename)
+ if match:
+ steps = int(match.group(2))
+
+ def edit_config(config_file):
+ """
+ Edits the config file to synchronize graphs.
+
+ Args:
+ config_file (str): Path to the config file.
+ """
+ with open(config_file, "r", encoding="utf8") as json_file:
+ config_data = json.load(json_file)
+
+ config_data["train"]["log_interval"] = steps
+
+ with open(config_file, "w", encoding="utf8") as json_file:
+ json.dump(
+ config_data,
+ json_file,
+ indent=2,
+ separators=(",", ": "),
+ ensure_ascii=False,
+ )
+
+ edit_config(model_config_file)
+ edit_config(rvc_config_file)
+
+ # Clean up unnecessary files
+ for root, dirs, files in os.walk(
+ os.path.join(now_dir, "logs", model_name), topdown=False
+ ):
+ for name in files:
+ file_path = os.path.join(root, name)
+ file_name, file_extension = os.path.splitext(name)
+ if file_extension == ".0":
+ os.remove(file_path)
+ elif ("D" in name or "G" in name) and file_extension == ".pth":
+ os.remove(file_path)
+ elif (
+ "added" in name or "trained" in name
+ ) and file_extension == ".index":
+ os.remove(file_path)
+ for name in dirs:
+ if name == "eval":
+ folder_path = os.path.join(root, name)
+ for item in os.listdir(folder_path):
+ item_path = os.path.join(folder_path, item)
+ if os.path.isfile(item_path):
+ os.remove(item_path)
+ os.rmdir(folder_path)
+
+ print("Successfully synchronized graphs!")
+ custom_total_epoch = total_epoch
+ custom_save_every_weights = save_every_weights
+ continue_overtrain_detector(training_file_path)
+ start()
+ else:
+ custom_total_epoch = total_epoch
+ custom_save_every_weights = save_every_weights
+ continue_overtrain_detector(training_file_path)
+ start()
+
+
+def run(
+ rank,
+ n_gpus,
+ experiment_dir,
+ pretrainG,
+ pretrainD,
+ pitch_guidance,
+ custom_total_epoch,
+ custom_save_every_weights,
+ config,
+):
+ """
+ Runs the training loop on a specific GPU.
+
+ Args:
+ rank (int): Rank of the current GPU.
+ n_gpus (int): Total number of GPUs.
+ """
+ global global_step
+
+ if rank == 0:
+ writer = SummaryWriter(log_dir=experiment_dir)
+ writer_eval = SummaryWriter(log_dir=os.path.join(experiment_dir, "eval"))
+
+ dist.init_process_group(
+ backend="gloo", init_method="env://", world_size=n_gpus, rank=rank
+ )
+ torch.manual_seed(config.train.seed)
+ if torch.cuda.is_available():
+ torch.cuda.set_device(rank)
+
+ # Zluda
+ if torch.cuda.is_available() and torch.cuda.get_device_name().endswith("[ZLUDA]"):
+ print("Disabling CUDNN for traning with Zluda")
+ torch.backends.cudnn.enabled = False
+ torch.backends.cuda.enable_flash_sdp(False)
+ torch.backends.cuda.enable_math_sdp(True)
+ torch.backends.cuda.enable_mem_efficient_sdp(False)
+
+ # Create datasets and dataloaders
+ if pitch_guidance == True:
+ train_dataset = TextAudioLoaderMultiNSFsid(config.data)
+ elif pitch_guidance == False:
+ train_dataset = TextAudioLoader(config.data)
+ else:
+ raise ValueError(f"Unexpected value for pitch_guidance: {pitch_guidance}")
+
+ train_sampler = DistributedBucketSampler(
+ train_dataset,
+ batch_size * n_gpus,
+ [100, 200, 300, 400, 500, 600, 700, 800, 900],
+ num_replicas=n_gpus,
+ rank=rank,
+ shuffle=True,
+ )
+
+ if pitch_guidance == True:
+ collate_fn = TextAudioCollateMultiNSFsid()
+ elif pitch_guidance == False:
+ collate_fn = TextAudioCollate()
+
+ train_loader = DataLoader(
+ train_dataset,
+ num_workers=4,
+ shuffle=False,
+ pin_memory=True,
+ collate_fn=collate_fn,
+ batch_sampler=train_sampler,
+ persistent_workers=True,
+ prefetch_factor=8,
+ )
+
+ # Initialize models and optimizers
+ net_g = Synthesizer(
+ config.data.filter_length // 2 + 1,
+ config.train.segment_size // config.data.hop_length,
+ **config.model,
+ use_f0=pitch_guidance == True,
+ is_half=config.train.fp16_run,
+ sr=sample_rate,
+ )
+ if torch.cuda.is_available():
+ net_g = net_g.cuda(rank)
+ if version == "v1":
+ net_d = MultiPeriodDiscriminator(config.model.use_spectral_norm)
+ else:
+ net_d = MultiPeriodDiscriminatorV2(config.model.use_spectral_norm)
+ if torch.cuda.is_available():
+ net_d = net_d.cuda(rank)
+ optim_g = torch.optim.AdamW(
+ net_g.parameters(),
+ config.train.learning_rate,
+ betas=config.train.betas,
+ eps=config.train.eps,
+ )
+ optim_d = torch.optim.AdamW(
+ net_d.parameters(),
+ config.train.learning_rate,
+ betas=config.train.betas,
+ eps=config.train.eps,
+ )
+
+ # Wrap models with DDP
+ if torch.cuda.is_available():
+ net_g = DDP(net_g, device_ids=[rank])
+ net_d = DDP(net_d, device_ids=[rank])
+ else:
+ net_g = DDP(net_g)
+ net_d = DDP(net_d)
+
+ # Load checkpoint if available
+ try:
+ print("Starting training...")
+ _, _, _, epoch_str = load_checkpoint(
+ latest_checkpoint_path(experiment_dir, "D_*.pth"), net_d, optim_d
+ )
+ _, _, _, epoch_str = load_checkpoint(
+ latest_checkpoint_path(experiment_dir, "G_*.pth"), net_g, optim_g
+ )
+ epoch_str += 1
+ global_step = (epoch_str - 1) * len(train_loader)
+
+ except:
+ epoch_str = 1
+ global_step = 0
+ if pretrainG != "":
+ if rank == 0:
+ print(f"Loaded pretrained (G) '{pretrainG}'")
+ if hasattr(net_g, "module"):
+ net_g.module.load_state_dict(
+ torch.load(pretrainG, map_location="cpu")["model"]
+ )
+
+ else:
+ net_g.load_state_dict(
+ torch.load(pretrainG, map_location="cpu")["model"]
+ )
+
+ if pretrainD != "":
+ if rank == 0:
+ print(f"Loaded pretrained (D) '{pretrainD}'")
+ if hasattr(net_d, "module"):
+ net_d.module.load_state_dict(
+ torch.load(pretrainD, map_location="cpu")["model"]
+ )
+
+ else:
+ net_d.load_state_dict(
+ torch.load(pretrainD, map_location="cpu")["model"]
+ )
+
+ # Initialize schedulers and scaler
+ scheduler_g = torch.optim.lr_scheduler.ExponentialLR(
+ optim_g, gamma=config.train.lr_decay, last_epoch=epoch_str - 2
+ )
+ scheduler_d = torch.optim.lr_scheduler.ExponentialLR(
+ optim_d, gamma=config.train.lr_decay, last_epoch=epoch_str - 2
+ )
+
+ optim_d.step()
+ optim_g.step()
+
+ scaler = GradScaler(enabled=config.train.fp16_run)
+
+ cache = []
+ for epoch in range(epoch_str, total_epoch + 1):
+ if rank == 0:
+ train_and_evaluate(
+ rank,
+ epoch,
+ config,
+ [net_g, net_d],
+ [optim_g, optim_d],
+ scaler,
+ [train_loader, None],
+ [writer, writer_eval],
+ cache,
+ custom_save_every_weights,
+ custom_total_epoch,
+ )
+ else:
+ train_and_evaluate(
+ rank,
+ epoch,
+ config,
+ [net_g, net_d],
+ [optim_g, optim_d],
+ scaler,
+ [train_loader, None],
+ None,
+ cache,
+ custom_save_every_weights,
+ custom_total_epoch,
+ )
+ scheduler_g.step()
+ scheduler_d.step()
+
+
+def train_and_evaluate(
+ rank,
+ epoch,
+ hps,
+ nets,
+ optims,
+ scaler,
+ loaders,
+ writers,
+ cache,
+ custom_save_every_weights,
+ custom_total_epoch,
+):
+ """
+ Trains and evaluates the model for one epoch.
+
+ Args:
+ rank (int): Rank of the current GPU.
+ epoch (int): Current epoch number.
+ hps (Namespace): Hyperparameters.
+ nets (list): List of models [net_g, net_d].
+ optims (list): List of optimizers [optim_g, optim_d].
+ scaler (GradScaler): Gradient scaler for mixed precision training.
+ loaders (list): List of dataloaders [train_loader, eval_loader].
+ writers (list): List of TensorBoard writers [writer, writer_eval].
+ cache (list): List to cache data in GPU memory.
+ """
+ global global_step, lowest_value, loss_disc, consecutive_increases_gen, consecutive_increases_disc
+
+ if epoch == 1:
+ lowest_value = {"step": 0, "value": float("inf"), "epoch": 0}
+ last_loss_gen_all = 0.0
+ consecutive_increases_gen = 0
+ consecutive_increases_disc = 0
+
+ net_g, net_d = nets
+ optim_g, optim_d = optims
+ train_loader = loaders[0] if loaders is not None else None
+ if writers is not None:
+ writer = writers[0]
+
+ train_loader.batch_sampler.set_epoch(epoch)
+
+ net_g.train()
+ net_d.train()
+
+ # Data caching
+ if cache_data_in_gpu:
+ data_iterator = cache
+ if cache == []:
+ for batch_idx, info in enumerate(train_loader):
+ if pitch_guidance == True:
+ (
+ phone,
+ phone_lengths,
+ pitch,
+ pitchf,
+ spec,
+ spec_lengths,
+ wave,
+ wave_lengths,
+ sid,
+ ) = info
+ elif pitch_guidance == False:
+ (
+ phone,
+ phone_lengths,
+ spec,
+ spec_lengths,
+ wave,
+ wave_lengths,
+ sid,
+ ) = info
+ if torch.cuda.is_available():
+ phone = phone.cuda(rank, non_blocking=True)
+ phone_lengths = phone_lengths.cuda(rank, non_blocking=True)
+ if pitch_guidance == True:
+ pitch = pitch.cuda(rank, non_blocking=True)
+ pitchf = pitchf.cuda(rank, non_blocking=True)
+ sid = sid.cuda(rank, non_blocking=True)
+ spec = spec.cuda(rank, non_blocking=True)
+ spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
+ wave = wave.cuda(rank, non_blocking=True)
+ wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
+ if pitch_guidance == True:
+ cache.append(
+ (
+ batch_idx,
+ (
+ phone,
+ phone_lengths,
+ pitch,
+ pitchf,
+ spec,
+ spec_lengths,
+ wave,
+ wave_lengths,
+ sid,
+ ),
+ )
+ )
+ elif pitch_guidance == False:
+ cache.append(
+ (
+ batch_idx,
+ (
+ phone,
+ phone_lengths,
+ spec,
+ spec_lengths,
+ wave,
+ wave_lengths,
+ sid,
+ ),
+ )
+ )
+ else:
+ shuffle(cache)
+ else:
+ data_iterator = enumerate(train_loader)
+
+ epoch_recorder = EpochRecorder()
+ with tqdm(total=len(train_loader), leave=False) as pbar:
+ for batch_idx, info in data_iterator:
+ if pitch_guidance == True:
+ (
+ phone,
+ phone_lengths,
+ pitch,
+ pitchf,
+ spec,
+ spec_lengths,
+ wave,
+ wave_lengths,
+ sid,
+ ) = info
+ elif pitch_guidance == False:
+ phone, phone_lengths, spec, spec_lengths, wave, wave_lengths, sid = info
+ if (cache_data_in_gpu == False) and torch.cuda.is_available():
+ phone = phone.cuda(rank, non_blocking=True)
+ phone_lengths = phone_lengths.cuda(rank, non_blocking=True)
+ if pitch_guidance == True:
+ pitch = pitch.cuda(rank, non_blocking=True)
+ pitchf = pitchf.cuda(rank, non_blocking=True)
+ sid = sid.cuda(rank, non_blocking=True)
+ spec = spec.cuda(rank, non_blocking=True)
+ spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
+ wave = wave.cuda(rank, non_blocking=True)
+ wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
+
+ # Forward pass
+ with autocast(enabled=config.train.fp16_run):
+ if pitch_guidance == True:
+ (
+ y_hat,
+ ids_slice,
+ x_mask,
+ z_mask,
+ (z, z_p, m_p, logs_p, m_q, logs_q),
+ ) = net_g(
+ phone, phone_lengths, pitch, pitchf, spec, spec_lengths, sid
+ )
+ elif pitch_guidance == False:
+ (
+ y_hat,
+ ids_slice,
+ x_mask,
+ z_mask,
+ (z, z_p, m_p, logs_p, m_q, logs_q),
+ ) = net_g(phone, phone_lengths, spec, spec_lengths, sid)
+ mel = spec_to_mel_torch(
+ spec,
+ config.data.filter_length,
+ config.data.n_mel_channels,
+ config.data.sample_rate,
+ config.data.mel_fmin,
+ config.data.mel_fmax,
+ )
+ y_mel = commons.slice_segments(
+ mel,
+ ids_slice,
+ config.train.segment_size // config.data.hop_length,
+ dim=3,
+ )
+ with autocast(enabled=False):
+ y_hat_mel = mel_spectrogram_torch(
+ y_hat.float().squeeze(1),
+ config.data.filter_length,
+ config.data.n_mel_channels,
+ config.data.sample_rate,
+ config.data.hop_length,
+ config.data.win_length,
+ config.data.mel_fmin,
+ config.data.mel_fmax,
+ )
+ if config.train.fp16_run == True:
+ y_hat_mel = y_hat_mel.half()
+ wave = commons.slice_segments(
+ wave,
+ ids_slice * config.data.hop_length,
+ config.train.segment_size,
+ dim=3,
+ )
+
+ y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
+ with autocast(enabled=False):
+ loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
+ y_d_hat_r, y_d_hat_g
+ )
+
+ # Discriminator backward and update
+ optim_d.zero_grad()
+ scaler.scale(loss_disc).backward()
+ scaler.unscale_(optim_d)
+ grad_norm_d = commons.clip_grad_value(net_d.parameters(), None)
+ scaler.step(optim_d)
+
+ # Generator backward and update
+ with autocast(enabled=config.train.fp16_run):
+ y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
+ with autocast(enabled=False):
+ loss_mel = F.l1_loss(y_mel, y_hat_mel) * config.train.c_mel
+ loss_kl = (
+ kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * config.train.c_kl
+ )
+ loss_fm = feature_loss(fmap_r, fmap_g)
+ loss_gen, losses_gen = generator_loss(y_d_hat_g)
+ loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl
+
+ if loss_gen_all < lowest_value["value"]:
+ lowest_value["value"] = loss_gen_all
+ lowest_value["step"] = global_step
+ lowest_value["epoch"] = epoch
+ # print(f'Lowest generator loss updated: {lowest_value["value"]} at epoch {epoch}, step {global_step}')
+ if epoch > lowest_value["epoch"]:
+ print(
+ "Alert: The lower generating loss has been exceeded by a lower loss in a subsequent epoch."
+ )
+
+ optim_g.zero_grad()
+ scaler.scale(loss_gen_all).backward()
+ scaler.unscale_(optim_g)
+ grad_norm_g = commons.clip_grad_value(net_g.parameters(), None)
+ scaler.step(optim_g)
+ scaler.update()
+
+ # Logging and checkpointing
+ if rank == 0:
+ if global_step % config.train.log_interval == 0:
+ lr = optim_g.param_groups[0]["lr"]
+
+ if loss_mel > 75:
+ loss_mel = 75
+ if loss_kl > 9:
+ loss_kl = 9
+
+ scalar_dict = {
+ "loss/g/total": loss_gen_all,
+ "loss/d/total": loss_disc,
+ "learning_rate": lr,
+ "grad_norm_d": grad_norm_d,
+ "grad_norm_g": grad_norm_g,
+ }
+ scalar_dict.update(
+ {
+ "loss/g/fm": loss_fm,
+ "loss/g/mel": loss_mel,
+ "loss/g/kl": loss_kl,
+ }
+ )
+ scalar_dict.update(
+ {f"loss/g/{i}": v for i, v in enumerate(losses_gen)}
+ )
+ scalar_dict.update(
+ {f"loss/d_r/{i}": v for i, v in enumerate(losses_disc_r)}
+ )
+ scalar_dict.update(
+ {f"loss/d_g/{i}": v for i, v in enumerate(losses_disc_g)}
+ )
+ image_dict = {
+ "slice/mel_org": plot_spectrogram_to_numpy(
+ y_mel[0].data.cpu().numpy()
+ ),
+ "slice/mel_gen": plot_spectrogram_to_numpy(
+ y_hat_mel[0].data.cpu().numpy()
+ ),
+ "all/mel": plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()),
+ }
+ summarize(
+ writer=writer,
+ global_step=global_step,
+ images=image_dict,
+ scalars=scalar_dict,
+ )
+
+ global_step += 1
+ pbar.update(1)
+
+ # Save checkpoint
+ if epoch % save_every_epoch == False and rank == 0:
+ checkpoint_suffix = (
+ f"{global_step if save_only_latest == False else 2333333}.pth"
+ )
+ save_checkpoint(
+ net_g,
+ optim_g,
+ config.train.learning_rate,
+ epoch,
+ os.path.join(experiment_dir, "G_" + checkpoint_suffix),
+ )
+ save_checkpoint(
+ net_d,
+ optim_d,
+ config.train.learning_rate,
+ epoch,
+ os.path.join(experiment_dir, "D_" + checkpoint_suffix),
+ )
+ if rank == 0 and custom_save_every_weights == True:
+ if hasattr(net_g, "module"):
+ ckpt = net_g.module.state_dict()
+ else:
+ ckpt = net_g.state_dict()
+ if overtraining_detector != True:
+ overtrain_info = None
+ extract_model(
+ ckpt=ckpt,
+ sr=sample_rate,
+ pitch_guidance=pitch_guidance == True,
+ name=model_name,
+ model_dir=os.path.join(
+ experiment_dir,
+ f"{model_name}_{epoch}e_{global_step}s.pth",
+ ),
+ epoch=epoch,
+ step=global_step,
+ version=version,
+ hps=hps,
+ overtrain_info=overtrain_info,
+ )
+
+ def check_overtraining(smoothed_loss_history, threshold, epsilon=0.004):
+ """
+ Checks for overtraining based on the smoothed loss history.
+
+ Args:
+ smoothed_loss_history (list): List of smoothed losses for each epoch.
+ threshold (int): Number of consecutive epochs with insignificant changes or increases to consider overtraining.
+ epsilon (float): The maximum change considered insignificant.
+ """
+ if len(smoothed_loss_history) < threshold + 1:
+ return False
+
+ for i in range(-threshold, -1):
+ if smoothed_loss_history[i + 1] > smoothed_loss_history[i]:
+ return True
+ if abs(smoothed_loss_history[i + 1] - smoothed_loss_history[i]) >= epsilon:
+ return False
+
+ return True
+
+ def update_exponential_moving_average(
+ smoothed_loss_history, new_value, smoothing=0.987
+ ):
+ """
+ Updates the exponential moving average with a new value.
+
+ Args:
+ smoothed_loss_history (list): List of smoothed values.
+ new_value (float): New value to be added.
+ smoothing (float): Smoothing factor.
+ """
+ if not smoothed_loss_history:
+ smoothed_value = new_value
+ else:
+ smoothed_value = (
+ smoothing * smoothed_loss_history[-1] + (1 - smoothing) * new_value
+ )
+ smoothed_loss_history.append(smoothed_value)
+ return smoothed_value
+
+ def save_to_json(
+ file_path,
+ loss_disc_history,
+ smoothed_loss_disc_history,
+ loss_gen_history,
+ smoothed_loss_gen_history,
+ ):
+ """
+ Save the training history to a JSON file.
+ """
+ data = {
+ "loss_disc_history": loss_disc_history,
+ "smoothed_loss_disc_history": smoothed_loss_disc_history,
+ "loss_gen_history": loss_gen_history,
+ "smoothed_loss_gen_history": smoothed_loss_gen_history,
+ }
+ with open(file_path, "w") as f:
+ json.dump(data, f)
+
+ if overtraining_detector and rank == 0 and epoch > 1:
+ # Add the current loss to the history
+ current_loss_disc = float(loss_disc)
+ loss_disc_history.append(current_loss_disc)
+
+ # Update smoothed loss history with loss_disc
+ smoothed_value_disc = update_exponential_moving_average(
+ smoothed_loss_disc_history, current_loss_disc
+ )
+
+ # Check overtraining with smoothed loss_disc
+ is_overtraining_disc = check_overtraining(
+ smoothed_loss_disc_history, overtraining_threshold * 2
+ )
+ if is_overtraining_disc:
+ consecutive_increases_disc += 1
+ else:
+ consecutive_increases_disc = 0
+ # Add the current loss_gen to the history
+ current_loss_gen = float(lowest_value["value"])
+ loss_gen_history.append(current_loss_gen)
+
+ # Update the smoothed loss_gen history
+ smoothed_value_gen = update_exponential_moving_average(
+ smoothed_loss_gen_history, current_loss_gen
+ )
+
+ # Check for overtraining with the smoothed loss_gen
+ is_overtraining_gen = check_overtraining(
+ smoothed_loss_gen_history, overtraining_threshold, 0.01
+ )
+ if is_overtraining_gen:
+ consecutive_increases_gen += 1
+ else:
+ consecutive_increases_gen = 0
+
+ overtrain_info = f"Smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}"
+ # Save the data in the JSON file if the epoch is divisible by save_every_epoch
+ if epoch % save_every_epoch == 0:
+ save_to_json(
+ training_file_path,
+ loss_disc_history,
+ smoothed_loss_disc_history,
+ loss_gen_history,
+ smoothed_loss_gen_history,
+ )
+
+ if (
+ is_overtraining_gen
+ and consecutive_increases_gen == overtraining_threshold
+ or is_overtraining_disc
+ and consecutive_increases_disc == (overtraining_threshold * 2)
+ ):
+ print(
+ f"Overtraining detected at epoch {epoch} with smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}"
+ )
+ os._exit(2333333)
+ else:
+ print(
+ f"New best epoch {epoch} with smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}"
+ )
+ old_model_files = glob.glob(
+ os.path.join(experiment_dir, f"{model_name}_*e_*s_best_epoch.pth")
+ )
+ for file in old_model_files:
+ os.remove(file)
+
+ if hasattr(net_g, "module"):
+ ckpt = net_g.module.state_dict()
+ else:
+ ckpt = net_g.state_dict()
+ if overtraining_detector != True:
+ overtrain_info = None
+ extract_model(
+ ckpt=ckpt,
+ sr=sample_rate,
+ pitch_guidance=pitch_guidance == True,
+ name=model_name,
+ model_dir=os.path.join(
+ experiment_dir,
+ f"{model_name}_{epoch}e_{global_step}s_best_epoch.pth",
+ ),
+ epoch=epoch,
+ step=global_step,
+ version=version,
+ hps=hps,
+ overtrain_info=overtrain_info,
+ )
+
+ # Print training progress
+ if rank == 0:
+ lowest_value_rounded = float(lowest_value["value"])
+ lowest_value_rounded = round(lowest_value_rounded, 3)
+
+ if epoch > 1 and overtraining_detector == True:
+ remaining_epochs_gen = overtraining_threshold - consecutive_increases_gen
+ remaining_epochs_disc = (
+ overtraining_threshold * 2
+ ) - consecutive_increases_disc
+ print(
+ f"{model_name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()} | lowest_value={lowest_value_rounded} (epoch {lowest_value['epoch']} and step {lowest_value['step']}) | Number of epochs remaining for overtraining: g/total: {remaining_epochs_gen} d/total: {remaining_epochs_disc} | smoothed_loss_gen={smoothed_value_gen:.3f} | smoothed_loss_disc={smoothed_value_disc:.3f}"
+ )
+ elif epoch > 1 and overtraining_detector == False:
+ print(
+ f"{model_name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()} | lowest_value={lowest_value_rounded} (epoch {lowest_value['epoch']} and step {lowest_value['step']})"
+ )
+ else:
+ print(
+ f"{model_name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()}"
+ )
+ last_loss_gen_all = loss_gen_all
+
+ # Save the final model
+ if epoch >= custom_total_epoch and rank == 0:
+ lowest_value_rounded = float(lowest_value["value"])
+ lowest_value_rounded = round(lowest_value_rounded, 3)
+ print(
+ f"Training has been successfully completed with {epoch} epoch, {global_step} steps and {round(loss_gen_all.item(), 3)} loss gen."
+ )
+ print(
+ f"Lowest generator loss: {lowest_value_rounded} at epoch {lowest_value['epoch']}, step {lowest_value['step']}"
+ )
+
+ pid_file_path = os.path.join(experiment_dir, "config.json")
+ with open(pid_file_path, "r") as pid_file:
+ pid_data = json.load(pid_file)
+ with open(pid_file_path, "w") as pid_file:
+ pid_data.pop("process_pids", None)
+ json.dump(pid_data, pid_file, indent=4)
+
+ if not os.path.exists(
+ os.path.join(experiment_dir, f"{model_name}_{epoch}e_{global_step}s.pth")
+ ):
+ if hasattr(net_g, "module"):
+ ckpt = net_g.module.state_dict()
+ else:
+ ckpt = net_g.state_dict()
+ if overtraining_detector != True:
+ overtrain_info = None
+ extract_model(
+ ckpt=ckpt,
+ sr=sample_rate,
+ pitch_guidance=pitch_guidance == True,
+ name=model_name,
+ model_dir=os.path.join(
+ experiment_dir,
+ f"{model_name}_{epoch}e_{global_step}s.pth",
+ ),
+ epoch=epoch,
+ step=global_step,
+ version=version,
+ hps=hps,
+ overtrain_info=overtrain_info,
+ )
+ sleep(1)
+ os._exit(2333333)
+
+
+if __name__ == "__main__":
+ torch.multiprocessing.set_start_method("spawn")
+ main()
diff --git a/rvc/train/utils.py b/rvc/train/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd4051630a5e8597156a67a42ff6ab9576504bdc
--- /dev/null
+++ b/rvc/train/utils.py
@@ -0,0 +1,248 @@
+import os
+import glob
+import torch
+import numpy as np
+from scipy.io.wavfile import read
+from collections import OrderedDict
+import matplotlib.pyplot as plt
+
+MATPLOTLIB_FLAG = False
+
+
+def replace_keys_in_dict(d, old_key_part, new_key_part):
+ """
+ Recursively replace parts of the keys in a dictionary.
+
+ Args:
+ d (dict or OrderedDict): The dictionary to update.
+ old_key_part (str): The part of the key to replace.
+ new_key_part (str): The new part of the key.
+ """
+ updated_dict = OrderedDict() if isinstance(d, OrderedDict) else {}
+ for key, value in d.items():
+ new_key = (
+ key.replace(old_key_part, new_key_part) if isinstance(key, str) else key
+ )
+ updated_dict[new_key] = (
+ replace_keys_in_dict(value, old_key_part, new_key_part)
+ if isinstance(value, dict)
+ else value
+ )
+ return updated_dict
+
+
+def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
+ """
+ Load a checkpoint into a model and optionally the optimizer.
+
+ Args:
+ checkpoint_path (str): Path to the checkpoint file.
+ model (torch.nn.Module): The model to load the checkpoint into.
+ optimizer (torch.optim.Optimizer, optional): The optimizer to load the state from. Defaults to None.
+ load_opt (int, optional): Whether to load the optimizer state. Defaults to 1.
+ """
+ assert os.path.isfile(
+ checkpoint_path
+ ), f"Checkpoint file not found: {checkpoint_path}"
+
+ checkpoint_dict = torch.load(checkpoint_path, map_location="cpu")
+ checkpoint_dict = replace_keys_in_dict(
+ replace_keys_in_dict(
+ checkpoint_dict, ".weight_v", ".parametrizations.weight.original1"
+ ),
+ ".weight_g",
+ ".parametrizations.weight.original0",
+ )
+
+ # Update model state_dict
+ model_state_dict = (
+ model.module.state_dict() if hasattr(model, "module") else model.state_dict()
+ )
+ new_state_dict = {
+ k: checkpoint_dict["model"].get(k, v) for k, v in model_state_dict.items()
+ }
+
+ # Load state_dict into model
+ if hasattr(model, "module"):
+ model.module.load_state_dict(new_state_dict, strict=False)
+ else:
+ model.load_state_dict(new_state_dict, strict=False)
+
+ if optimizer and load_opt == 1:
+ optimizer.load_state_dict(checkpoint_dict.get("optimizer", {}))
+
+ print(
+ f"Loaded checkpoint '{checkpoint_path}' (epoch {checkpoint_dict['iteration']})"
+ )
+ return (
+ model,
+ optimizer,
+ checkpoint_dict.get("learning_rate", 0),
+ checkpoint_dict["iteration"],
+ )
+
+
+def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
+ """
+ Save the model and optimizer state to a checkpoint file.
+
+ Args:
+ model (torch.nn.Module): The model to save.
+ optimizer (torch.optim.Optimizer): The optimizer to save the state of.
+ learning_rate (float): The current learning rate.
+ iteration (int): The current iteration.
+ checkpoint_path (str): The path to save the checkpoint to.
+ """
+ state_dict = (
+ model.module.state_dict() if hasattr(model, "module") else model.state_dict()
+ )
+ checkpoint_data = {
+ "model": state_dict,
+ "iteration": iteration,
+ "optimizer": optimizer.state_dict(),
+ "learning_rate": learning_rate,
+ }
+ torch.save(checkpoint_data, checkpoint_path)
+
+ # Create a backwards-compatible checkpoint
+ old_version_path = checkpoint_path.replace(".pth", "_old_version.pth")
+ checkpoint_data = replace_keys_in_dict(
+ replace_keys_in_dict(
+ checkpoint_data, ".parametrizations.weight.original1", ".weight_v"
+ ),
+ ".parametrizations.weight.original0",
+ ".weight_g",
+ )
+ torch.save(checkpoint_data, old_version_path)
+
+ os.replace(old_version_path, checkpoint_path)
+ print(f"Saved model '{checkpoint_path}' (epoch {iteration})")
+
+
+def summarize(
+ writer,
+ global_step,
+ scalars={},
+ histograms={},
+ images={},
+ audios={},
+ audio_sample_rate=22050,
+):
+ """
+ Log various summaries to a TensorBoard writer.
+
+ Args:
+ writer (SummaryWriter): The TensorBoard writer.
+ global_step (int): The current global step.
+ scalars (dict, optional): Dictionary of scalar values to log.
+ histograms (dict, optional): Dictionary of histogram values to log.
+ images (dict, optional): Dictionary of image values to log.
+ audios (dict, optional): Dictionary of audio values to log.
+ audio_sample_rate (int, optional): Sampling rate of the audio data.
+ """
+ for k, v in scalars.items():
+ writer.add_scalar(k, v, global_step)
+ for k, v in histograms.items():
+ writer.add_histogram(k, v, global_step)
+ for k, v in images.items():
+ writer.add_image(k, v, global_step, dataformats="HWC")
+ for k, v in audios.items():
+ writer.add_audio(k, v, global_step, audio_sample_rate)
+
+
+def latest_checkpoint_path(dir_path, regex="G_*.pth"):
+ """
+ Get the latest checkpoint file in a directory.
+
+ Args:
+ dir_path (str): The directory to search for checkpoints.
+ regex (str, optional): The regular expression to match checkpoint files.
+ """
+ checkpoints = sorted(
+ glob.glob(os.path.join(dir_path, regex)),
+ key=lambda f: int("".join(filter(str.isdigit, f))),
+ )
+ return checkpoints[-1] if checkpoints else None
+
+
+def plot_spectrogram_to_numpy(spectrogram):
+ """
+ Convert a spectrogram to a NumPy array for visualization.
+
+ Args:
+ spectrogram (numpy.ndarray): The spectrogram to plot.
+ """
+ global MATPLOTLIB_FLAG
+ if not MATPLOTLIB_FLAG:
+ plt.switch_backend("Agg")
+ MATPLOTLIB_FLAG = True
+
+ fig, ax = plt.subplots(figsize=(10, 2))
+ im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
+ plt.colorbar(im, ax=ax)
+ plt.xlabel("Frames")
+ plt.ylabel("Channels")
+ plt.tight_layout()
+
+ fig.canvas.draw()
+ data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+ plt.close(fig)
+ return data
+
+
+def load_wav_to_torch(full_path):
+ """
+ Load a WAV file into a PyTorch tensor.
+
+ Args:
+ full_path (str): The path to the WAV file.
+ """
+ sample_rate, data = read(full_path)
+ return torch.FloatTensor(data.astype(np.float32)), sample_rate
+
+
+def load_filepaths_and_text(filename, split="|"):
+ """
+ Load filepaths and associated text from a file.
+
+ Args:
+ filename (str): The path to the file.
+ split (str, optional): The delimiter used to split the lines.
+ """
+ with open(filename, encoding="utf-8") as f:
+ return [line.strip().split(split) for line in f]
+
+
+class HParams:
+ """
+ A class for storing and accessing hyperparameters.
+ """
+
+ def __init__(self, **kwargs):
+ for k, v in kwargs.items():
+ self[k] = HParams(**v) if isinstance(v, dict) else v
+
+ def keys(self):
+ return self.__dict__.keys()
+
+ def items(self):
+ return self.__dict__.items()
+
+ def values(self):
+ return self.__dict__.values()
+
+ def __len__(self):
+ return len(self.__dict__)
+
+ def __getitem__(self, key):
+ return self.__dict__[key]
+
+ def __setitem__(self, key, value):
+ self.__dict__[key] = value
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+ def __repr__(self):
+ return repr(self.__dict__)
diff --git a/tabs/download/download.py b/tabs/download/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..50f1255d7f8a9f2efd4bf2bfe399f938c31ab3ee
--- /dev/null
+++ b/tabs/download/download.py
@@ -0,0 +1,221 @@
+import os, sys, shutil
+import tempfile
+import gradio as gr
+import pandas as pd
+import requests
+import wget
+import json
+from core import run_download_script
+
+from assets.i18n.i18n import I18nAuto
+
+from rvc.lib.utils import format_title
+
+i18n = I18nAuto()
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+gradio_temp_dir = os.path.join(tempfile.gettempdir(), "gradio")
+
+if os.path.exists(gradio_temp_dir):
+ shutil.rmtree(gradio_temp_dir)
+
+
+def save_drop_model(dropbox):
+ if "pth" not in dropbox and "index" not in dropbox:
+ raise gr.Error(
+ message="The file you dropped is not a valid model file. Please try again."
+ )
+ else:
+ file_name = format_title(os.path.basename(dropbox))
+ if ".pth" in dropbox:
+ model_name = format_title(file_name.split(".pth")[0])
+ else:
+ if "v2" not in dropbox:
+ model_name = format_title(
+ file_name.split("_nprobe_1_")[1].split("_v1")[0]
+ )
+ else:
+ model_name = format_title(
+ file_name.split("_nprobe_1_")[1].split("_v2")[0]
+ )
+ model_path = os.path.join(now_dir, "logs", model_name)
+ if not os.path.exists(model_path):
+ os.makedirs(model_path)
+ if os.path.exists(os.path.join(model_path, file_name)):
+ os.remove(os.path.join(model_path, file_name))
+ shutil.move(dropbox, os.path.join(model_path, file_name))
+ print(f"{file_name} saved in {model_path}")
+ gr.Info(f"{file_name} saved in {model_path}")
+ return None
+
+
+def search_models(name):
+ url = f"https://cjtfqzjfdimgpvpwhzlv.supabase.co/rest/v1/models?name=ilike.%25{name}%25&order=created_at.desc&limit=15"
+ headers = {
+ "apikey": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImNqdGZxempmZGltZ3B2cHdoemx2Iiwicm9sZSI6ImFub24iLCJpYXQiOjE2OTUxNjczODgsImV4cCI6MjAxMDc0MzM4OH0.7z5WMIbjR99c2Ooc0ma7B_FyGq10G8X-alkCYTkKR10"
+ }
+ response = requests.get(url, headers=headers)
+ data = response.json()
+ if len(data) == 0:
+ gr.Info(i18n("We couldn't find models by that name."))
+ return None
+ else:
+ df = pd.DataFrame(data)[["name", "link", "epochs", "type"]]
+ df["link"] = df["link"].apply(
+ lambda x: f'{x}'
+ )
+ return df
+
+
+json_url = "https://huggingface.co/IAHispano/Applio/raw/main/pretrains.json"
+
+
+def fetch_pretrained_data():
+ pretraineds_custom_path = os.path.join(
+ "rvc", "models", "pretraineds", "pretraineds_custom"
+ )
+ os.makedirs(pretraineds_custom_path, exist_ok=True)
+ try:
+ with open(
+ os.path.join(pretraineds_custom_path, json_url.split("/")[-1]), "r"
+ ) as f:
+ data = json.load(f)
+ except:
+ try:
+ response = requests.get(json_url)
+ response.raise_for_status()
+ data = response.json()
+ with open(
+ os.path.join(pretraineds_custom_path, json_url.split("/")[-1]),
+ "w",
+ encoding="utf-8",
+ ) as f:
+ json.dump(
+ data,
+ f,
+ indent=2,
+ separators=(",", ": "),
+ ensure_ascii=False,
+ )
+ except:
+ data = {
+ "Titan": {
+ "32k": {"D": "null", "G": "null"},
+ },
+ }
+ return data
+
+
+def get_pretrained_list():
+ data = fetch_pretrained_data()
+ return list(data.keys())
+
+
+def get_pretrained_sample_rates(model):
+ data = fetch_pretrained_data()
+ return list(data[model].keys())
+
+
+def download_pretrained_model(model, sample_rate):
+ data = fetch_pretrained_data()
+ paths = data[model][sample_rate]
+ pretraineds_custom_path = os.path.join(
+ "rvc", "models", "pretraineds", "pretraineds_custom"
+ )
+ os.makedirs(pretraineds_custom_path, exist_ok=True)
+
+ d_url = f"https://huggingface.co/{paths['D']}"
+ g_url = f"https://huggingface.co/{paths['G']}"
+
+ gr.Info("Downloading Pretrained Model...")
+ print("Downloading Pretrained Model...")
+ wget.download(d_url, out=pretraineds_custom_path)
+ wget.download(g_url, out=pretraineds_custom_path)
+
+
+def update_sample_rate_dropdown(model):
+ return {
+ "choices": get_pretrained_sample_rates(model),
+ "value": get_pretrained_sample_rates(model)[0],
+ "__type__": "update",
+ }
+
+
+def download_tab():
+ with gr.Column():
+ gr.Markdown(value=i18n("## Download Model"))
+ model_link = gr.Textbox(
+ label=i18n("Model Link"),
+ placeholder=i18n("Introduce the model link"),
+ interactive=True,
+ )
+ model_download_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=8,
+ interactive=False,
+ )
+ model_download_button = gr.Button(i18n("Download Model"))
+ model_download_button.click(
+ fn=run_download_script,
+ inputs=[model_link],
+ outputs=[model_download_output_info],
+ api_name="model_download",
+ )
+ gr.Markdown(value=i18n("## Drop files"))
+ dropbox = gr.File(
+ label=i18n(
+ "Drag your .pth file and .index file into this space. Drag one and then the other."
+ ),
+ type="filepath",
+ )
+
+ dropbox.upload(
+ fn=save_drop_model,
+ inputs=[dropbox],
+ outputs=[dropbox],
+ )
+ gr.Markdown(value=i18n("## Search Model"))
+ search_name = gr.Textbox(
+ label=i18n("Model Name"),
+ placeholder=i18n("Introduce the model name to search."),
+ interactive=True,
+ )
+ search_table = gr.Dataframe(datatype="markdown")
+ search = gr.Button(i18n("Search"))
+ search.click(
+ fn=search_models,
+ inputs=[search_name],
+ outputs=[search_table],
+ )
+ search_name.submit(search_models, [search_name], search_table)
+ gr.Markdown(value=i18n("## Download Pretrained Models"))
+ pretrained_model = gr.Dropdown(
+ label=i18n("Pretrained"),
+ info=i18n("Select the pretrained model you want to download."),
+ choices=get_pretrained_list(),
+ value="Titan",
+ interactive=True,
+ )
+ pretrained_sample_rate = gr.Dropdown(
+ label=i18n("Sampling Rate"),
+ info=i18n("And select the sampling rate."),
+ choices=get_pretrained_sample_rates(pretrained_model.value),
+ value="40k",
+ interactive=True,
+ allow_custom_value=True,
+ )
+ pretrained_model.change(
+ update_sample_rate_dropdown,
+ inputs=[pretrained_model],
+ outputs=[pretrained_sample_rate],
+ )
+ download_pretrained = gr.Button(i18n("Download"))
+ download_pretrained.click(
+ fn=download_pretrained_model,
+ inputs=[pretrained_model, pretrained_sample_rate],
+ outputs=[],
+ )
diff --git a/tabs/extra/analyzer/analyzer.py b/tabs/extra/analyzer/analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b05653c0b0ecd3c03f83f9a4b4321e23472f727
--- /dev/null
+++ b/tabs/extra/analyzer/analyzer.py
@@ -0,0 +1,32 @@
+import os, sys
+import gradio as gr
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from core import run_audio_analyzer_script
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def analyzer_tab():
+ with gr.Column():
+ audio_input = gr.Audio(type="filepath")
+ output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=8,
+ interactive=False,
+ )
+ get_info_button = gr.Button(
+ value=i18n("Get information about the audio"), variant="primary"
+ )
+ image_output = gr.Image(type="filepath", interactive=False)
+
+ get_info_button.click(
+ fn=run_audio_analyzer_script,
+ inputs=[audio_input],
+ outputs=[output_info, image_output],
+ )
diff --git a/tabs/extra/extra.py b/tabs/extra/extra.py
new file mode 100644
index 0000000000000000000000000000000000000000..49d063d4a1af9f971ab4c8fb5f6e3abfd194dff5
--- /dev/null
+++ b/tabs/extra/extra.py
@@ -0,0 +1,35 @@
+import gradio as gr
+
+import os
+import sys
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from tabs.extra.processing.processing import processing_tab
+from tabs.extra.analyzer.analyzer import analyzer_tab
+from tabs.extra.f0_extractor.f0_extractor import f0_extractor_tab
+from tabs.extra.model_author.model_author import model_author_tab
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def extra_tab():
+ gr.Markdown(
+ value=i18n(
+ "This section contains some extra utilities that often may be in experimental phases."
+ )
+ )
+ with gr.TabItem(i18n("Model information")):
+ processing_tab()
+
+ with gr.TabItem(i18n("F0 Curve")):
+ f0_extractor_tab()
+
+ with gr.TabItem(i18n("Audio Analyzer")):
+ analyzer_tab()
+
+ with gr.TabItem(i18n("Model Author")):
+ model_author_tab()
diff --git a/tabs/extra/f0_extractor/f0_extractor.py b/tabs/extra/f0_extractor/f0_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..31bedecd8147d0f22425fded9106ab5c77d45216
--- /dev/null
+++ b/tabs/extra/f0_extractor/f0_extractor.py
@@ -0,0 +1,69 @@
+import librosa
+import gradio as gr
+import os
+from matplotlib import pyplot as plt
+
+from rvc.lib.predictors.F0Extractor import F0Extractor
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def extract_f0_curve(audio_path: str, method: str) -> tuple:
+ print("Extracting F0 Curve...")
+ image_path = os.path.join("logs", "f0_plot.png")
+ txt_path = os.path.join("logs", "f0_curve.txt")
+ y, sr = librosa.load(audio_path, sr=None)
+ hop_length = 160
+
+ librosa.note_to_hz("C1")
+ librosa.note_to_hz("C8")
+
+ f0_extractor = F0Extractor(audio_path, sample_rate=sr, method=method)
+ f0 = f0_extractor.extract_f0()
+
+ plt.figure(figsize=(10, 4))
+ plt.plot(f0)
+ plt.title(method)
+ plt.xlabel("Time (frames)")
+ plt.ylabel("Frequency (Hz)")
+ plt.savefig(image_path)
+ plt.close()
+
+ with open(txt_path, "w") as txtfile:
+ for i, f0_value in enumerate(f0):
+ frequency = i * sr / hop_length
+ txtfile.write(f"{frequency},{f0_value}\n")
+
+ print("F0 Curve extracted successfully!")
+ return image_path, txt_path
+
+
+def f0_extractor_tab():
+ with gr.Accordion(label=i18n("Extract F0 Curve")):
+ with gr.Row():
+ audio = gr.Audio(label=i18n("Upload Audio"), type="filepath")
+ f0_method = gr.Radio(
+ label=i18n("Pitch extraction algorithm"),
+ info=i18n(
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+ ),
+ choices=["crepe", "fcpe", "rmvpe"],
+ value="rmvpe",
+ )
+ button = gr.Button(i18n("Extract F0 Curve"), variant="primary")
+
+ with gr.Accordion(label=i18n("Output Information")):
+ txt_output = gr.File(label="F0 Curve", type="filepath")
+ image_output = gr.Image(type="filepath", interactive=False)
+
+ button.click(
+ fn=extract_f0_curve,
+ inputs=[
+ audio,
+ f0_method,
+ ],
+ outputs=[image_output, txt_output],
+ api_name="f0_extract",
+ )
diff --git a/tabs/extra/model_author/model_author.py b/tabs/extra/model_author/model_author.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ee8e11bd845933bd4535dc4eafd7815673a2b0e
--- /dev/null
+++ b/tabs/extra/model_author/model_author.py
@@ -0,0 +1,34 @@
+import os, sys
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+
+from core import run_model_author_script
+
+i18n = I18nAuto()
+
+
+def model_author_tab():
+ model_author_name = gr.Textbox(
+ label=i18n("Model Author Name"),
+ info=i18n("The name that will appear in the model information."),
+ placeholder=i18n("Enter your nickname"),
+ interactive=True,
+ )
+ model_author_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=1,
+ )
+ button = gr.Button(i18n("Set name"), variant="primary")
+
+ button.click(
+ fn=run_model_author_script,
+ inputs=[model_author_name],
+ outputs=[model_author_output_info],
+ api_name="model_author",
+ )
diff --git a/tabs/extra/model_information.py b/tabs/extra/model_information.py
new file mode 100644
index 0000000000000000000000000000000000000000..de0fc73d22be343e12f4fef994300bca89f3daa0
--- /dev/null
+++ b/tabs/extra/model_information.py
@@ -0,0 +1,30 @@
+import gradio as gr
+from core import run_model_information_script
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def model_information_tab():
+ with gr.Column():
+ model_name = gr.Textbox(
+ label=i18n("Path to Model"),
+ info=i18n("Introduce the model pth path"),
+ placeholder=i18n("Introduce the model pth path"),
+ interactive=True,
+ )
+ model_information_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=11,
+ interactive=False,
+ )
+ model_information_button = gr.Button(i18n("See Model Information"))
+ model_information_button.click(
+ fn=run_model_information_script,
+ inputs=[model_name],
+ outputs=[model_information_output_info],
+ api_name="model_information",
+ )
diff --git a/tabs/extra/processing/processing.py b/tabs/extra/processing/processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..1eb037ac14e92cf25fec0e605fcd8784f7d80176
--- /dev/null
+++ b/tabs/extra/processing/processing.py
@@ -0,0 +1,38 @@
+import os, sys
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from core import run_model_information_script
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+import gradio as gr
+
+
+def processing_tab():
+ with gr.Accordion(label=i18n("View model information")):
+ with gr.Row():
+ with gr.Column():
+ model_view_model_path = gr.Textbox(
+ label=i18n("Path to Model"),
+ info=i18n("Introduce the model pth path"),
+ value="",
+ interactive=True,
+ placeholder=i18n("Enter path to model"),
+ )
+
+ model_view_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=11,
+ )
+ model_view_button = gr.Button(i18n("View"), variant="primary")
+ model_view_button.click(
+ fn=run_model_information_script,
+ inputs=[model_view_model_path],
+ outputs=[model_view_output_info],
+ api_name="model_info",
+ )
diff --git a/tabs/inference/inference.py b/tabs/inference/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..7544c6be993691e08c61dc7dac0cbbc1ffde8ddf
--- /dev/null
+++ b/tabs/inference/inference.py
@@ -0,0 +1,2146 @@
+import os, sys
+import gradio as gr
+import regex as re
+import shutil
+import datetime
+import json
+
+from core import (
+ run_infer_script,
+ run_batch_infer_script,
+)
+
+from assets.i18n.i18n import I18nAuto
+
+from rvc.lib.utils import format_title
+from tabs.settings.restart import stop_infer
+
+i18n = I18nAuto()
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+model_root = os.path.join(now_dir, "logs")
+audio_root = os.path.join(now_dir, "assets", "audios")
+custom_embedder_root = os.path.join(
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
+)
+
+PRESETS_DIR = os.path.join(now_dir, "assets", "presets")
+FORMANTSHIFT_DIR = os.path.join(now_dir, "assets", "formant_shift")
+
+os.makedirs(custom_embedder_root, exist_ok=True)
+
+custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
+model_root_relative = os.path.relpath(model_root, now_dir)
+audio_root_relative = os.path.relpath(audio_root, now_dir)
+
+sup_audioext = {
+ "wav",
+ "mp3",
+ "flac",
+ "ogg",
+ "opus",
+ "m4a",
+ "mp4",
+ "aac",
+ "alac",
+ "wma",
+ "aiff",
+ "webm",
+ "ac3",
+}
+
+names = [
+ os.path.join(root, file)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for file in files
+ if (
+ file.endswith((".pth", ".onnx"))
+ and not (file.startswith("G_") or file.startswith("D_"))
+ )
+]
+
+indexes_list = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for name in files
+ if name.endswith(".index") and "trained" not in name
+]
+
+audio_paths = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(audio_root_relative, topdown=False)
+ for name in files
+ if name.endswith(tuple(sup_audioext))
+ and root == audio_root_relative
+ and "_output" not in name
+]
+
+custom_embedders = [
+ os.path.join(dirpath, dirname)
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+ for dirname in dirnames
+]
+
+
+def update_sliders(preset):
+ with open(
+ os.path.join(PRESETS_DIR, f"{preset}.json"), "r", encoding="utf-8"
+ ) as json_file:
+ values = json.load(json_file)
+ return (
+ values["pitch"],
+ values["filter_radius"],
+ values["index_rate"],
+ values["rms_mix_rate"],
+ values["protect"],
+ )
+
+
+def update_sliders_formant(preset):
+ with open(
+ os.path.join(FORMANTSHIFT_DIR, f"{preset}.json"), "r", encoding="utf-8"
+ ) as json_file:
+ values = json.load(json_file)
+ return (
+ values["formant_qfrency"],
+ values["formant_timbre"],
+ )
+
+
+def export_presets(presets, file_path):
+ with open(file_path, "w", encoding="utf-8") as json_file:
+ json.dump(presets, json_file, ensure_ascii=False, indent=4)
+
+
+def import_presets(file_path):
+ with open(file_path, "r", encoding="utf-8") as json_file:
+ presets = json.load(json_file)
+ return presets
+
+
+def get_presets_data(pitch, filter_radius, index_rate, rms_mix_rate, protect):
+ return {
+ "pitch": pitch,
+ "filter_radius": filter_radius,
+ "index_rate": index_rate,
+ "rms_mix_rate": rms_mix_rate,
+ "protect": protect,
+ }
+
+
+def export_presets_button(
+ preset_name, pitch, filter_radius, index_rate, rms_mix_rate, protect
+):
+ if preset_name:
+ file_path = os.path.join(PRESETS_DIR, f"{preset_name}.json")
+ presets_data = get_presets_data(
+ pitch, filter_radius, index_rate, rms_mix_rate, protect
+ )
+ with open(file_path, "w", encoding="utf-8") as json_file:
+ json.dump(presets_data, json_file, ensure_ascii=False, indent=4)
+ return "Export successful"
+ return "Export cancelled"
+
+
+def import_presets_button(file_path):
+ if file_path:
+ imported_presets = import_presets(file_path.name)
+ return (
+ list(imported_presets.keys()),
+ imported_presets,
+ "Presets imported successfully!",
+ )
+ return [], {}, "No file selected for import."
+
+
+def list_json_files(directory):
+ return [f.rsplit(".", 1)[0] for f in os.listdir(directory) if f.endswith(".json")]
+
+
+def refresh_presets():
+ json_files = list_json_files(PRESETS_DIR)
+ return gr.update(choices=json_files)
+
+
+def output_path_fn(input_audio_path):
+ original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[
+ 0
+ ]
+ new_name = original_name_without_extension + "_output.wav"
+ output_path = os.path.join(os.path.dirname(input_audio_path), new_name)
+ return output_path
+
+
+def change_choices():
+ names = [
+ os.path.join(root, file)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for file in files
+ if (
+ file.endswith((".pth", ".onnx"))
+ and not (file.startswith("G_") or file.startswith("D_"))
+ )
+ ]
+
+ indexes_list = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for name in files
+ if name.endswith(".index") and "trained" not in name
+ ]
+
+ audio_paths = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(audio_root_relative, topdown=False)
+ for name in files
+ if name.endswith(tuple(sup_audioext))
+ and root == audio_root_relative
+ and "_output" not in name
+ ]
+
+ return (
+ {"choices": sorted(names), "__type__": "update"},
+ {"choices": sorted(indexes_list), "__type__": "update"},
+ {"choices": sorted(audio_paths), "__type__": "update"},
+ )
+
+
+def get_indexes():
+ indexes_list = [
+ os.path.join(dirpath, filename)
+ for dirpath, _, filenames in os.walk(model_root_relative)
+ for filename in filenames
+ if filename.endswith(".index") and "trained" not in filename
+ ]
+
+ return indexes_list if indexes_list else ""
+
+
+def save_to_wav(record_button):
+ if record_button is None:
+ pass
+ else:
+ path_to_file = record_button
+ new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
+ target_path = os.path.join(audio_root_relative, os.path.basename(new_name))
+
+ shutil.move(path_to_file, target_path)
+ return target_path, output_path_fn(target_path)
+
+
+def save_to_wav2(upload_audio):
+ file_path = upload_audio
+ formated_name = format_title(os.path.basename(file_path))
+ target_path = os.path.join(audio_root_relative, formated_name)
+
+ if os.path.exists(target_path):
+ os.remove(target_path)
+
+ shutil.copy(file_path, target_path)
+ return target_path, output_path_fn(target_path)
+
+
+def delete_outputs():
+ gr.Info(f"Outputs cleared!")
+ for root, _, files in os.walk(audio_root_relative, topdown=False):
+ for name in files:
+ if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
+ os.remove(os.path.join(root, name))
+
+
+def match_index(model_file_value):
+ if model_file_value:
+ model_folder = os.path.dirname(model_file_value)
+ model_name = os.path.basename(model_file_value)
+ index_files = get_indexes()
+ pattern = r"^(.*?)_"
+ match = re.match(pattern, model_name)
+ for index_file in index_files:
+ if os.path.dirname(index_file) == model_folder:
+ return index_file
+ elif match and match.group(1) in os.path.basename(index_file):
+ return index_file
+ elif model_name in os.path.basename(index_file):
+ return index_file
+ return ""
+
+
+def create_folder_and_move_files(folder_name, bin_file, config_file):
+ if not folder_name:
+ return "Folder name must not be empty."
+
+ folder_name = os.path.join(custom_embedder_root, folder_name)
+ os.makedirs(folder_name, exist_ok=True)
+
+ if bin_file:
+ bin_file_path = os.path.join(folder_name, os.path.basename(bin_file))
+ shutil.copy(bin_file, bin_file_path)
+
+ if config_file:
+ config_file_path = os.path.join(folder_name, os.path.basename(config_file))
+ shutil.copy(config_file, config_file_path)
+
+ return f"Files moved to folder {folder_name}"
+
+
+def refresh_formant():
+ json_files = list_json_files(FORMANTSHIFT_DIR)
+ return gr.update(choices=json_files)
+
+
+def refresh_embedders_folders():
+ custom_embedders = [
+ os.path.join(dirpath, dirname)
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+ for dirname in dirnames
+ ]
+ return custom_embedders
+
+
+# Inference tab
+def inference_tab():
+ default_weight = names[0] if names else None
+ with gr.Row():
+ with gr.Row():
+ model_file = gr.Dropdown(
+ label=i18n("Voice Model"),
+ info=i18n("Select the voice model to use for the conversion."),
+ choices=sorted(names, key=lambda path: os.path.getsize(path)),
+ interactive=True,
+ value=default_weight,
+ allow_custom_value=True,
+ )
+
+ index_file = gr.Dropdown(
+ label=i18n("Index File"),
+ info=i18n("Select the index file to use for the conversion."),
+ choices=get_indexes(),
+ value=match_index(default_weight) if default_weight else "",
+ interactive=True,
+ allow_custom_value=True,
+ )
+ with gr.Column():
+ refresh_button = gr.Button(i18n("Refresh"))
+ unload_button = gr.Button(i18n("Unload Voice"))
+
+ unload_button.click(
+ fn=lambda: (
+ {"value": "", "__type__": "update"},
+ {"value": "", "__type__": "update"},
+ ),
+ inputs=[],
+ outputs=[model_file, index_file],
+ )
+
+ model_file.select(
+ fn=lambda model_file_value: match_index(model_file_value),
+ inputs=[model_file],
+ outputs=[index_file],
+ )
+
+ # Single inference tab
+ with gr.Tab(i18n("Single")):
+ with gr.Column():
+ upload_audio = gr.Audio(
+ label=i18n("Upload Audio"), type="filepath", editable=False
+ )
+ with gr.Row():
+ audio = gr.Dropdown(
+ label=i18n("Select Audio"),
+ info=i18n("Select the audio to convert."),
+ choices=sorted(audio_paths),
+ value=audio_paths[0] if audio_paths else "",
+ interactive=True,
+ allow_custom_value=True,
+ )
+
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
+ with gr.Column():
+ clear_outputs_infer = gr.Button(
+ i18n("Clear Outputs (Deletes all audios in assets/audios)")
+ )
+ output_path = gr.Textbox(
+ label=i18n("Output Path"),
+ placeholder=i18n("Enter output path"),
+ info=i18n(
+ "The path where the output audio will be saved, by default in assets/audios/output.wav"
+ ),
+ value=(
+ output_path_fn(audio_paths[0])
+ if audio_paths
+ else os.path.join(now_dir, "assets", "audios", "output.wav")
+ ),
+ interactive=True,
+ )
+ export_format = gr.Radio(
+ label=i18n("Export Format"),
+ info=i18n("Select the format to export the audio."),
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+ value="WAV",
+ interactive=True,
+ )
+ split_audio = gr.Checkbox(
+ label=i18n("Split Audio"),
+ info=i18n(
+ "Split the audio into chunks for inference to obtain better results in some cases."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ autotune = gr.Checkbox(
+ label=i18n("Autotune"),
+ info=i18n(
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ clean_audio = gr.Checkbox(
+ label=i18n("Clean Audio"),
+ info=i18n(
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ clean_strength = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Clean Strength"),
+ info=i18n(
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+ ),
+ visible=False,
+ value=0.5,
+ interactive=True,
+ )
+ upscale_audio = gr.Checkbox(
+ label=i18n("Upscale Audio"),
+ info=i18n(
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)"
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ formant_shifting = gr.Checkbox(
+ label=i18n("Formant Shifting"),
+ info=i18n(
+ "Enable formant shifting. Used for male to female and vice-versa convertions."
+ ),
+ value=False,
+ visible=True,
+ interactive=True,
+ )
+ with gr.Row():
+ formant_preset = gr.Dropdown(
+ label=i18n("Browse presets for formanting"),
+ info=i18n(
+ "Presets are located in /assets/formant_shift folder"
+ ),
+ choices=list_json_files(FORMANTSHIFT_DIR),
+ visible=False,
+ interactive=True,
+ )
+ formant_refresh_button = gr.Button(
+ value="Refresh",
+ visible=False,
+ )
+ formant_qfrency = gr.Slider(
+ value=1.0,
+ info=i18n("Default value is 1.0"),
+ label=i18n("Quefrency for formant shifting"),
+ minimum=0.0,
+ maximum=16.0,
+ step=0.1,
+ visible=False,
+ interactive=True,
+ )
+ formant_timbre = gr.Slider(
+ value=1.0,
+ info=i18n("Default value is 1.0"),
+ label=i18n("Timbre for formant shifting"),
+ minimum=0.0,
+ maximum=16.0,
+ step=0.1,
+ visible=False,
+ interactive=True,
+ )
+ post_process = gr.Checkbox(
+ label=i18n("Post-Process"),
+ info=i18n("Post-process the audio to apply effects to the output."),
+ value=False,
+ interactive=True,
+ )
+ reverb = gr.Checkbox(
+ label=i18n("Reverb"),
+ info=i18n("Apply reverb to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ reverb_room_size = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Room Size"),
+ info=i18n("Set the room size of the reverb."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_damping = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Damping"),
+ info=i18n("Set the damping of the reverb."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_wet_gain = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Wet Gain"),
+ info=i18n("Set the wet gain of the reverb."),
+ value=0.33,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_dry_gain = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Dry Gain"),
+ info=i18n("Set the dry gain of the reverb."),
+ value=0.4,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_width = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Width"),
+ info=i18n("Set the width of the reverb."),
+ value=1.0,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_freeze_mode = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Freeze Mode"),
+ info=i18n("Set the freeze mode of the reverb."),
+ value=0.0,
+ interactive=True,
+ visible=False,
+ )
+ pitch_shift = gr.Checkbox(
+ label=i18n("Pitch Shift"),
+ info=i18n("Apply pitch shift to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ pitch_shift_semitones = gr.Slider(
+ minimum=-12,
+ maximum=12,
+ label=i18n("Pitch Shift Semitones"),
+ info=i18n("Set the pitch shift semitones."),
+ value=0,
+ interactive=True,
+ visible=False,
+ )
+ limiter = gr.Checkbox(
+ label=i18n("Limiter"),
+ info=i18n("Apply limiter to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ limiter_threshold = gr.Slider(
+ minimum=-60,
+ maximum=0,
+ label=i18n("Limiter Threshold dB"),
+ info=i18n("Set the limiter threshold dB."),
+ value=-6,
+ interactive=True,
+ visible=False,
+ )
+
+ limiter_release_time = gr.Slider(
+ minimum=0.01,
+ maximum=1,
+ label=i18n("Limiter Release Time"),
+ info=i18n("Set the limiter release time."),
+ value=0.05,
+ interactive=True,
+ visible=False,
+ )
+ gain = gr.Checkbox(
+ label=i18n("Gain"),
+ info=i18n("Apply gain to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ gain_db = gr.Slider(
+ minimum=-60,
+ maximum=60,
+ label=i18n("Gain dB"),
+ info=i18n("Set the gain dB."),
+ value=0,
+ interactive=True,
+ visible=False,
+ )
+ distortion = gr.Checkbox(
+ label=i18n("Distortion"),
+ info=i18n("Apply distortion to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ distortion_gain = gr.Slider(
+ minimum=-60,
+ maximum=60,
+ label=i18n("Distortion Gain"),
+ info=i18n("Set the distortion gain."),
+ value=25,
+ interactive=True,
+ visible=False,
+ )
+ chorus = gr.Checkbox(
+ label=i18n("chorus"),
+ info=i18n("Apply chorus to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ chorus_rate = gr.Slider(
+ minimum=0,
+ maximum=100,
+ label=i18n("Chorus Rate Hz"),
+ info=i18n("Set the chorus rate Hz."),
+ value=1.0,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_depth = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("chorus Depth"),
+ info=i18n("Set the chorus depth."),
+ value=0.25,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_center_delay = gr.Slider(
+ minimum=7,
+ maximum=8,
+ label=i18n("chorus Center Delay ms"),
+ info=i18n("Set the chorus center delay ms."),
+ value=7,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_feedback = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("chorus Feedback"),
+ info=i18n("Set the chorus feedback."),
+ value=0.0,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_mix = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Chorus Mix"),
+ info=i18n("Set the chorus mix."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+ bitcrush = gr.Checkbox(
+ label=i18n("Bitcrush"),
+ info=i18n("Apply bitcrush to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ bitcrush_bit_depth = gr.Slider(
+ minimum=1,
+ maximum=32,
+ label=i18n("Bitcrush Bit Depth"),
+ info=i18n("Set the bitcrush bit depth."),
+ value=8,
+ interactive=True,
+ visible=False,
+ )
+ clipping = gr.Checkbox(
+ label=i18n("Clipping"),
+ info=i18n("Apply clipping to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ clipping_threshold = gr.Slider(
+ minimum=-60,
+ maximum=0,
+ label=i18n("Clipping Threshold"),
+ info=i18n("Set the clipping threshold."),
+ value=-6,
+ interactive=True,
+ visible=False,
+ )
+ compressor = gr.Checkbox(
+ label=i18n("Compressor"),
+ info=i18n("Apply compressor to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ compressor_threshold = gr.Slider(
+ minimum=-60,
+ maximum=0,
+ label=i18n("Compressor Threshold dB"),
+ info=i18n("Set the compressor threshold dB."),
+ value=0,
+ interactive=True,
+ visible=False,
+ )
+
+ compressor_ratio = gr.Slider(
+ minimum=1,
+ maximum=20,
+ label=i18n("Compressor Ratio"),
+ info=i18n("Set the compressor ratio."),
+ value=1,
+ interactive=True,
+ visible=False,
+ )
+
+ compressor_attack = gr.Slider(
+ minimum=0.0,
+ maximum=100,
+ label=i18n("Compressor Attack ms"),
+ info=i18n("Set the compressor attack ms."),
+ value=1.0,
+ interactive=True,
+ visible=False,
+ )
+
+ compressor_release = gr.Slider(
+ minimum=0.01,
+ maximum=100,
+ label=i18n("Compressor Release ms"),
+ info=i18n("Set the compressor release ms."),
+ value=100,
+ interactive=True,
+ visible=False,
+ )
+ delay = gr.Checkbox(
+ label=i18n("Delay"),
+ info=i18n("Apply delay to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ delay_seconds = gr.Slider(
+ minimum=0.0,
+ maximum=5.0,
+ label=i18n("Delay Seconds"),
+ info=i18n("Set the delay seconds."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+
+ delay_feedback = gr.Slider(
+ minimum=0.0,
+ maximum=1.0,
+ label=i18n("Delay Feedback"),
+ info=i18n("Set the delay feedback."),
+ value=0.0,
+ interactive=True,
+ visible=False,
+ )
+
+ delay_mix = gr.Slider(
+ minimum=0.0,
+ maximum=1.0,
+ label=i18n("Delay Mix"),
+ info=i18n("Set the delay mix."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+ with gr.Accordion(i18n("Preset Settings"), open=False):
+ with gr.Row():
+ preset_dropdown = gr.Dropdown(
+ label=i18n("Select Custom Preset"),
+ choices=list_json_files(PRESETS_DIR),
+ interactive=True,
+ )
+ presets_refresh_button = gr.Button(i18n("Refresh Presets"))
+ import_file = gr.File(
+ label=i18n("Select file to import"),
+ file_count="single",
+ type="filepath",
+ interactive=True,
+ )
+ import_file.change(
+ import_presets_button,
+ inputs=import_file,
+ outputs=[preset_dropdown],
+ )
+ presets_refresh_button.click(
+ refresh_presets, outputs=preset_dropdown
+ )
+ with gr.Row():
+ preset_name_input = gr.Textbox(
+ label=i18n("Preset Name"),
+ placeholder=i18n("Enter preset name"),
+ )
+ export_button = gr.Button(i18n("Export Preset"))
+ pitch = gr.Slider(
+ minimum=-24,
+ maximum=24,
+ step=1,
+ label=i18n("Pitch"),
+ info=i18n(
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
+ ),
+ value=0,
+ interactive=True,
+ )
+ filter_radius = gr.Slider(
+ minimum=0,
+ maximum=7,
+ label=i18n("Filter Radius"),
+ info=i18n(
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration."
+ ),
+ value=3,
+ step=1,
+ interactive=True,
+ )
+ index_rate = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Search Feature Ratio"),
+ info=i18n(
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+ ),
+ value=0.75,
+ interactive=True,
+ )
+ rms_mix_rate = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Volume Envelope"),
+ info=i18n(
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+ ),
+ value=1,
+ interactive=True,
+ )
+ protect = gr.Slider(
+ minimum=0,
+ maximum=0.5,
+ label=i18n("Protect Voiceless Consonants"),
+ info=i18n(
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+ ),
+ value=0.5,
+ interactive=True,
+ )
+ preset_dropdown.change(
+ update_sliders,
+ inputs=preset_dropdown,
+ outputs=[
+ pitch,
+ filter_radius,
+ index_rate,
+ rms_mix_rate,
+ protect,
+ ],
+ )
+ export_button.click(
+ export_presets_button,
+ inputs=[
+ preset_name_input,
+ pitch,
+ filter_radius,
+ index_rate,
+ rms_mix_rate,
+ protect,
+ ],
+ )
+ hop_length = gr.Slider(
+ minimum=1,
+ maximum=512,
+ step=1,
+ label=i18n("Hop Length"),
+ info=i18n(
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy."
+ ),
+ visible=False,
+ value=128,
+ interactive=True,
+ )
+ f0_method = gr.Radio(
+ label=i18n("Pitch extraction algorithm"),
+ info=i18n(
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+ ),
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ "fcpe",
+ "hybrid[rmvpe+fcpe]",
+ ],
+ value="rmvpe",
+ interactive=True,
+ )
+ embedder_model = gr.Radio(
+ label=i18n("Embedder Model"),
+ info=i18n("Model used for learning speaker embedding."),
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ value="contentvec",
+ interactive=True,
+ )
+ with gr.Column(visible=False) as embedder_custom:
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
+ with gr.Row():
+ embedder_model_custom = gr.Dropdown(
+ label=i18n("Select Custom Embedder"),
+ choices=refresh_embedders_folders(),
+ interactive=True,
+ allow_custom_value=True,
+ )
+ refresh_embedders_button = gr.Button(
+ i18n("Refresh embedders")
+ )
+ folder_name_input = gr.Textbox(
+ label=i18n("Folder Name"), interactive=True
+ )
+ with gr.Row():
+ bin_file_upload = gr.File(
+ label=i18n("Upload .bin"),
+ type="filepath",
+ interactive=True,
+ )
+ config_file_upload = gr.File(
+ label=i18n("Upload .json"),
+ type="filepath",
+ interactive=True,
+ )
+ move_files_button = gr.Button(
+ i18n("Move files to custom embedder folder")
+ )
+
+ f0_file = gr.File(
+ label=i18n(
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls."
+ ),
+ visible=True,
+ )
+
+ convert_button1 = gr.Button(i18n("Convert"))
+
+ with gr.Row():
+ vc_output1 = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ )
+ vc_output2 = gr.Audio(label=i18n("Export Audio"))
+
+ # Batch inference tab
+ with gr.Tab(i18n("Batch")):
+ with gr.Row():
+ with gr.Column():
+ input_folder_batch = gr.Textbox(
+ label=i18n("Input Folder"),
+ info=i18n("Select the folder containing the audios to convert."),
+ placeholder=i18n("Enter input path"),
+ value=os.path.join(now_dir, "assets", "audios"),
+ interactive=True,
+ )
+ output_folder_batch = gr.Textbox(
+ label=i18n("Output Folder"),
+ info=i18n(
+ "Select the folder where the output audios will be saved."
+ ),
+ placeholder=i18n("Enter output path"),
+ value=os.path.join(now_dir, "assets", "audios"),
+ interactive=True,
+ )
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
+ with gr.Column():
+ clear_outputs_batch = gr.Button(
+ i18n("Clear Outputs (Deletes all audios in assets/audios)")
+ )
+ export_format_batch = gr.Radio(
+ label=i18n("Export Format"),
+ info=i18n("Select the format to export the audio."),
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+ value="WAV",
+ interactive=True,
+ )
+ split_audio_batch = gr.Checkbox(
+ label=i18n("Split Audio"),
+ info=i18n(
+ "Split the audio into chunks for inference to obtain better results in some cases."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ autotune_batch = gr.Checkbox(
+ label=i18n("Autotune"),
+ info=i18n(
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ clean_audio_batch = gr.Checkbox(
+ label=i18n("Clean Audio"),
+ info=i18n(
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ clean_strength_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Clean Strength"),
+ info=i18n(
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+ ),
+ visible=False,
+ value=0.5,
+ interactive=True,
+ )
+ upscale_audio_batch = gr.Checkbox(
+ label=i18n("Upscale Audio"),
+ info=i18n(
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)"
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ formant_shifting_batch = gr.Checkbox(
+ label=i18n("Formant Shifting"),
+ info=i18n(
+ "Enable formant shifting. Used for male to female and vice-versa convertions."
+ ),
+ value=False,
+ visible=True,
+ interactive=True,
+ )
+ with gr.Row():
+ formant_preset_batch = gr.Dropdown(
+ label=i18n("Browse presets for formanting"),
+ info=i18n(
+ "Presets are located in /assets/formant_shift folder"
+ ),
+ choices=list_json_files(FORMANTSHIFT_DIR),
+ visible=False,
+ interactive=True,
+ )
+ formant_refresh_button_batch = gr.Button(
+ value="Refresh",
+ visible=False,
+ variant="primary",
+ )
+ formant_qfrency_batch = gr.Slider(
+ value=1.0,
+ info=i18n("Default value is 1.0"),
+ label=i18n("Quefrency for formant shifting"),
+ minimum=0.0,
+ maximum=16.0,
+ step=0.1,
+ visible=False,
+ interactive=True,
+ )
+ formant_timbre_batch = gr.Slider(
+ value=1.0,
+ info=i18n("Default value is 1.0"),
+ label=i18n("Timbre for formant shifting"),
+ minimum=0.0,
+ maximum=16.0,
+ step=0.1,
+ visible=False,
+ interactive=True,
+ )
+ post_process_batch = gr.Checkbox(
+ label=i18n("Post-Process"),
+ info=i18n("Post-process the audio to apply effects to the output."),
+ value=False,
+ interactive=True,
+ )
+ reverb_batch = gr.Checkbox(
+ label=i18n("Reverb"),
+ info=i18n("Apply reverb to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ reverb_room_size_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Room Size"),
+ info=i18n("Set the room size of the reverb."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_damping_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Damping"),
+ info=i18n("Set the damping of the reverb."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_wet_gain_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Wet Gain"),
+ info=i18n("Set the wet gain of the reverb."),
+ value=0.33,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_dry_gain_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Dry Gain"),
+ info=i18n("Set the dry gain of the reverb."),
+ value=0.4,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_width_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Width"),
+ info=i18n("Set the width of the reverb."),
+ value=1.0,
+ interactive=True,
+ visible=False,
+ )
+
+ reverb_freeze_mode_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Reverb Freeze Mode"),
+ info=i18n("Set the freeze mode of the reverb."),
+ value=0.0,
+ interactive=True,
+ visible=False,
+ )
+ pitch_shift_batch = gr.Checkbox(
+ label=i18n("Pitch Shift"),
+ info=i18n("Apply pitch shift to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ pitch_shift_semitones_batch = gr.Slider(
+ minimum=-12,
+ maximum=12,
+ label=i18n("Pitch Shift Semitones"),
+ info=i18n("Set the pitch shift semitones."),
+ value=0,
+ interactive=True,
+ visible=False,
+ )
+ limiter_batch = gr.Checkbox(
+ label=i18n("Limiter"),
+ info=i18n("Apply limiter to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ limiter_threshold_batch = gr.Slider(
+ minimum=-60,
+ maximum=0,
+ label=i18n("Limiter Threshold dB"),
+ info=i18n("Set the limiter threshold dB."),
+ value=-6,
+ interactive=True,
+ visible=False,
+ )
+
+ limiter_release_time_batch = gr.Slider(
+ minimum=0.01,
+ maximum=1,
+ label=i18n("Limiter Release Time"),
+ info=i18n("Set the limiter release time."),
+ value=0.05,
+ interactive=True,
+ visible=False,
+ )
+ gain_batch = gr.Checkbox(
+ label=i18n("Gain"),
+ info=i18n("Apply gain to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ gain_db_batch = gr.Slider(
+ minimum=-60,
+ maximum=60,
+ label=i18n("Gain dB"),
+ info=i18n("Set the gain dB."),
+ value=0,
+ interactive=True,
+ visible=False,
+ )
+ distortion_batch = gr.Checkbox(
+ label=i18n("Distortion"),
+ info=i18n("Apply distortion to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ distortion_gain_batch = gr.Slider(
+ minimum=-60,
+ maximum=60,
+ label=i18n("Distortion Gain"),
+ info=i18n("Set the distortion gain."),
+ value=25,
+ interactive=True,
+ visible=False,
+ )
+ chorus_batch = gr.Checkbox(
+ label=i18n("chorus"),
+ info=i18n("Apply chorus to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ chorus_rate_batch = gr.Slider(
+ minimum=0,
+ maximum=100,
+ label=i18n("Chorus Rate Hz"),
+ info=i18n("Set the chorus rate Hz."),
+ value=1.0,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_depth_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("chorus Depth"),
+ info=i18n("Set the chorus depth."),
+ value=0.25,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_center_delay_batch = gr.Slider(
+ minimum=7,
+ maximum=8,
+ label=i18n("chorus Center Delay ms"),
+ info=i18n("Set the chorus center delay ms."),
+ value=7,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_feedback_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("chorus Feedback"),
+ info=i18n("Set the chorus feedback."),
+ value=0.0,
+ interactive=True,
+ visible=False,
+ )
+
+ chorus_mix_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Chorus Mix"),
+ info=i18n("Set the chorus mix."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+ bitcrush_batch = gr.Checkbox(
+ label=i18n("Bitcrush"),
+ info=i18n("Apply bitcrush to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ bitcrush_bit_depth_batch = gr.Slider(
+ minimum=1,
+ maximum=32,
+ label=i18n("Bitcrush Bit Depth"),
+ info=i18n("Set the bitcrush bit depth."),
+ value=8,
+ interactive=True,
+ visible=False,
+ )
+ clipping_batch = gr.Checkbox(
+ label=i18n("Clipping"),
+ info=i18n("Apply clipping to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ clipping_threshold_batch = gr.Slider(
+ minimum=-60,
+ maximum=0,
+ label=i18n("Clipping Threshold"),
+ info=i18n("Set the clipping threshold."),
+ value=-6,
+ interactive=True,
+ visible=False,
+ )
+ compressor_batch = gr.Checkbox(
+ label=i18n("Compressor"),
+ info=i18n("Apply compressor to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ compressor_threshold_batch = gr.Slider(
+ minimum=-60,
+ maximum=0,
+ label=i18n("Compressor Threshold dB"),
+ info=i18n("Set the compressor threshold dB."),
+ value=0,
+ interactive=True,
+ visible=False,
+ )
+
+ compressor_ratio_batch = gr.Slider(
+ minimum=1,
+ maximum=20,
+ label=i18n("Compressor Ratio"),
+ info=i18n("Set the compressor ratio."),
+ value=1,
+ interactive=True,
+ visible=False,
+ )
+
+ compressor_attack_batch = gr.Slider(
+ minimum=0.0,
+ maximum=100,
+ label=i18n("Compressor Attack ms"),
+ info=i18n("Set the compressor attack ms."),
+ value=1.0,
+ interactive=True,
+ visible=False,
+ )
+
+ compressor_release_batch = gr.Slider(
+ minimum=0.01,
+ maximum=100,
+ label=i18n("Compressor Release ms"),
+ info=i18n("Set the compressor release ms."),
+ value=100,
+ interactive=True,
+ visible=False,
+ )
+ delay_batch = gr.Checkbox(
+ label=i18n("Delay"),
+ info=i18n("Apply delay to the audio."),
+ value=False,
+ interactive=True,
+ visible=False,
+ )
+ delay_seconds_batch = gr.Slider(
+ minimum=0.0,
+ maximum=5.0,
+ label=i18n("Delay Seconds"),
+ info=i18n("Set the delay seconds."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+
+ delay_feedback_batch = gr.Slider(
+ minimum=0.0,
+ maximum=1.0,
+ label=i18n("Delay Feedback"),
+ info=i18n("Set the delay feedback."),
+ value=0.0,
+ interactive=True,
+ visible=False,
+ )
+
+ delay_mix_batch = gr.Slider(
+ minimum=0.0,
+ maximum=1.0,
+ label=i18n("Delay Mix"),
+ info=i18n("Set the delay mix."),
+ value=0.5,
+ interactive=True,
+ visible=False,
+ )
+ with gr.Accordion(i18n("Preset Settings"), open=False):
+ with gr.Row():
+ preset_dropdown = gr.Dropdown(
+ label=i18n("Select Custom Preset"),
+ interactive=True,
+ )
+ presets_batch_refresh_button = gr.Button(
+ i18n("Refresh Presets")
+ )
+ import_file = gr.File(
+ label=i18n("Select file to import"),
+ file_count="single",
+ type="filepath",
+ interactive=True,
+ )
+ import_file.change(
+ import_presets_button,
+ inputs=import_file,
+ outputs=[preset_dropdown],
+ )
+ presets_batch_refresh_button.click(
+ refresh_presets, outputs=preset_dropdown
+ )
+ with gr.Row():
+ preset_name_input = gr.Textbox(
+ label=i18n("Preset Name"),
+ placeholder=i18n("Enter preset name"),
+ )
+ export_button = gr.Button(i18n("Export Preset"))
+ pitch_batch = gr.Slider(
+ minimum=-24,
+ maximum=24,
+ step=1,
+ label=i18n("Pitch"),
+ info=i18n(
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
+ ),
+ value=0,
+ interactive=True,
+ )
+ filter_radius_batch = gr.Slider(
+ minimum=0,
+ maximum=7,
+ label=i18n("Filter Radius"),
+ info=i18n(
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration."
+ ),
+ value=3,
+ step=1,
+ interactive=True,
+ )
+ index_rate_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Search Feature Ratio"),
+ info=i18n(
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+ ),
+ value=0.75,
+ interactive=True,
+ )
+ rms_mix_rate_batch = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Volume Envelope"),
+ info=i18n(
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+ ),
+ value=1,
+ interactive=True,
+ )
+ protect_batch = gr.Slider(
+ minimum=0,
+ maximum=0.5,
+ label=i18n("Protect Voiceless Consonants"),
+ info=i18n(
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+ ),
+ value=0.5,
+ interactive=True,
+ )
+ preset_dropdown.change(
+ update_sliders,
+ inputs=preset_dropdown,
+ outputs=[
+ pitch_batch,
+ filter_radius_batch,
+ index_rate_batch,
+ rms_mix_rate_batch,
+ protect_batch,
+ ],
+ )
+ export_button.click(
+ export_presets_button,
+ inputs=[
+ preset_name_input,
+ pitch,
+ filter_radius,
+ index_rate,
+ rms_mix_rate,
+ protect,
+ ],
+ outputs=[],
+ )
+ hop_length_batch = gr.Slider(
+ minimum=1,
+ maximum=512,
+ step=1,
+ label=i18n("Hop Length"),
+ info=i18n(
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy."
+ ),
+ visible=False,
+ value=128,
+ interactive=True,
+ )
+ f0_method_batch = gr.Radio(
+ label=i18n("Pitch extraction algorithm"),
+ info=i18n(
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+ ),
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ "fcpe",
+ "hybrid[rmvpe+fcpe]",
+ ],
+ value="rmvpe",
+ interactive=True,
+ )
+ embedder_model_batch = gr.Radio(
+ label=i18n("Embedder Model"),
+ info=i18n("Model used for learning speaker embedding."),
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ value="contentvec",
+ interactive=True,
+ )
+ f0_file_batch = gr.File(
+ label=i18n(
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls."
+ ),
+ visible=True,
+ )
+ with gr.Column(visible=False) as embedder_custom_batch:
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
+ with gr.Row():
+ embedder_model_custom_batch = gr.Dropdown(
+ label=i18n("Select Custom Embedder"),
+ choices=refresh_embedders_folders(),
+ interactive=True,
+ allow_custom_value=True,
+ )
+ refresh_embedders_button_batch = gr.Button(
+ i18n("Refresh embedders")
+ )
+ folder_name_input_batch = gr.Textbox(
+ label=i18n("Folder Name"), interactive=True
+ )
+ with gr.Row():
+ bin_file_upload_batch = gr.File(
+ label=i18n("Upload .bin"),
+ type="filepath",
+ interactive=True,
+ )
+ config_file_upload_batch = gr.File(
+ label=i18n("Upload .json"),
+ type="filepath",
+ interactive=True,
+ )
+ move_files_button_batch = gr.Button(
+ i18n("Move files to custom embedder folder")
+ )
+
+ convert_button2 = gr.Button(i18n("Convert"))
+ stop_button = gr.Button(i18n("Stop convert"), visible=False)
+ stop_button.click(fn=stop_infer, inputs=[], outputs=[])
+
+ with gr.Row():
+ vc_output3 = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ )
+
+ def toggle_visible(checkbox):
+ return {"visible": checkbox, "__type__": "update"}
+
+ def toggle_visible_hop_length(f0_method):
+ if f0_method == "crepe" or f0_method == "crepe-tiny":
+ return {"visible": True, "__type__": "update"}
+ return {"visible": False, "__type__": "update"}
+
+ def toggle_visible_embedder_custom(embedder_model):
+ if embedder_model == "custom":
+ return {"visible": True, "__type__": "update"}
+ return {"visible": False, "__type__": "update"}
+
+ def enable_stop_convert_button():
+ return {"visible": False, "__type__": "update"}, {
+ "visible": True,
+ "__type__": "update",
+ }
+
+ def disable_stop_convert_button():
+ return {"visible": True, "__type__": "update"}, {
+ "visible": False,
+ "__type__": "update",
+ }
+
+ def toggle_visible_formant_shifting(checkbox):
+ if checkbox:
+ return (
+ gr.update(visible=True),
+ gr.update(visible=True),
+ gr.update(visible=True),
+ gr.update(visible=True),
+ )
+ else:
+ return (
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(visible=False),
+ )
+
+ def update_visibility(checkbox, count):
+ return [gr.update(visible=checkbox) for _ in range(count)]
+
+ def post_process_visible(checkbox):
+ return update_visibility(checkbox, 11)
+
+ def reverb_visible(checkbox):
+ return update_visibility(checkbox, 6)
+
+ def limiter_visible(checkbox):
+ return update_visibility(checkbox, 2)
+
+ def chorus_visible(checkbox):
+ return update_visibility(checkbox, 6)
+
+ def bitcrush_visible(checkbox):
+ return update_visibility(checkbox, 1)
+
+ def compress_visible(checkbox):
+ return update_visibility(checkbox, 4)
+
+ def delay_visible(checkbox):
+ return update_visibility(checkbox, 3)
+
+ clean_audio.change(
+ fn=toggle_visible,
+ inputs=[clean_audio],
+ outputs=[clean_strength],
+ )
+ formant_shifting.change(
+ fn=toggle_visible_formant_shifting,
+ inputs=[formant_shifting],
+ outputs=[
+ formant_preset,
+ formant_refresh_button,
+ formant_qfrency,
+ formant_timbre,
+ ],
+ )
+ formant_shifting_batch.change(
+ fn=toggle_visible_formant_shifting,
+ inputs=[formant_shifting],
+ outputs=[
+ formant_preset_batch,
+ formant_refresh_button_batch,
+ formant_qfrency_batch,
+ formant_timbre_batch,
+ ],
+ )
+ formant_refresh_button.click(
+ fn=refresh_formant,
+ inputs=[],
+ outputs=[formant_preset],
+ )
+ formant_preset.change(
+ fn=update_sliders_formant,
+ inputs=[formant_preset],
+ outputs=[
+ formant_qfrency,
+ formant_timbre,
+ ],
+ )
+ formant_preset_batch.change(
+ fn=update_sliders_formant,
+ inputs=[formant_preset_batch],
+ outputs=[
+ formant_qfrency,
+ formant_timbre,
+ ],
+ )
+ post_process.change(
+ fn=post_process_visible,
+ inputs=[post_process],
+ outputs=[
+ reverb,
+ pitch_shift,
+ limiter,
+ gain,
+ distortion,
+ chorus,
+ bitcrush,
+ clipping,
+ compressor,
+ delay,
+ clean_audio,
+ ],
+ )
+
+ reverb.change(
+ fn=reverb_visible,
+ inputs=[reverb],
+ outputs=[
+ reverb_room_size,
+ reverb_damping,
+ reverb_wet_gain,
+ reverb_dry_gain,
+ reverb_width,
+ reverb_freeze_mode,
+ ],
+ )
+ pitch_shift.change(
+ fn=toggle_visible,
+ inputs=[pitch_shift],
+ outputs=[pitch_shift_semitones],
+ )
+ limiter.change(
+ fn=limiter_visible,
+ inputs=[limiter],
+ outputs=[limiter_threshold, limiter_release_time],
+ )
+ gain.change(
+ fn=toggle_visible,
+ inputs=[gain],
+ outputs=[gain_db],
+ )
+ distortion.change(
+ fn=toggle_visible,
+ inputs=[distortion],
+ outputs=[distortion_gain],
+ )
+ chorus.change(
+ fn=chorus_visible,
+ inputs=[chorus],
+ outputs=[
+ chorus_rate,
+ chorus_depth,
+ chorus_center_delay,
+ chorus_feedback,
+ chorus_mix,
+ ],
+ )
+ bitcrush.change(
+ fn=bitcrush_visible,
+ inputs=[bitcrush],
+ outputs=[bitcrush_bit_depth],
+ )
+ clipping.change(
+ fn=toggle_visible,
+ inputs=[clipping],
+ outputs=[clipping_threshold],
+ )
+ compressor.change(
+ fn=compress_visible,
+ inputs=[compressor],
+ outputs=[
+ compressor_threshold,
+ compressor_ratio,
+ compressor_attack,
+ compressor_release,
+ ],
+ )
+ delay.change(
+ fn=delay_visible,
+ inputs=[delay],
+ outputs=[delay_seconds, delay_feedback, delay_mix],
+ )
+ post_process_batch.change(
+ fn=post_process_visible,
+ inputs=[post_process_batch],
+ outputs=[
+ reverb_batch,
+ pitch_shift_batch,
+ limiter_batch,
+ gain_batch,
+ distortion_batch,
+ chorus_batch,
+ bitcrush_batch,
+ clipping_batch,
+ compressor_batch,
+ delay_batch,
+ clean_audio_batch,
+ ],
+ )
+
+ reverb_batch.change(
+ fn=reverb_visible,
+ inputs=[reverb_batch],
+ outputs=[
+ reverb_room_size_batch,
+ reverb_damping_batch,
+ reverb_wet_gain_batch,
+ reverb_dry_gain_batch,
+ reverb_width_batch,
+ reverb_freeze_mode_batch,
+ ],
+ )
+ pitch_shift_batch.change(
+ fn=toggle_visible,
+ inputs=[pitch_shift_batch],
+ outputs=[pitch_shift_semitones_batch],
+ )
+ limiter_batch.change(
+ fn=limiter_visible,
+ inputs=[limiter_batch],
+ outputs=[limiter_threshold_batch, limiter_release_time_batch],
+ )
+ gain_batch.change(
+ fn=toggle_visible,
+ inputs=[gain_batch],
+ outputs=[gain_db_batch],
+ )
+ distortion_batch.change(
+ fn=toggle_visible,
+ inputs=[distortion_batch],
+ outputs=[distortion_gain_batch],
+ )
+ chorus_batch.change(
+ fn=chorus_visible,
+ inputs=[chorus_batch],
+ outputs=[
+ chorus_rate_batch,
+ chorus_depth_batch,
+ chorus_center_delay_batch,
+ chorus_feedback_batch,
+ chorus_mix_batch,
+ ],
+ )
+ bitcrush_batch.change(
+ fn=bitcrush_visible,
+ inputs=[bitcrush_batch],
+ outputs=[bitcrush_bit_depth_batch],
+ )
+ clipping_batch.change(
+ fn=toggle_visible,
+ inputs=[clipping_batch],
+ outputs=[clipping_threshold_batch],
+ )
+ compressor_batch.change(
+ fn=compress_visible,
+ inputs=[compressor_batch],
+ outputs=[
+ compressor_threshold_batch,
+ compressor_ratio_batch,
+ compressor_attack_batch,
+ compressor_release_batch,
+ ],
+ )
+ delay_batch.change(
+ fn=delay_visible,
+ inputs=[delay_batch],
+ outputs=[delay_seconds_batch, delay_feedback_batch, delay_mix_batch],
+ )
+ clean_audio_batch.change(
+ fn=toggle_visible,
+ inputs=[clean_audio_batch],
+ outputs=[clean_strength_batch],
+ )
+ f0_method.change(
+ fn=toggle_visible_hop_length,
+ inputs=[f0_method],
+ outputs=[hop_length],
+ )
+ f0_method_batch.change(
+ fn=toggle_visible_hop_length,
+ inputs=[f0_method_batch],
+ outputs=[hop_length_batch],
+ )
+ refresh_button.click(
+ fn=change_choices,
+ inputs=[],
+ outputs=[
+ model_file,
+ index_file,
+ audio,
+ ],
+ )
+ audio.change(
+ fn=output_path_fn,
+ inputs=[audio],
+ outputs=[output_path],
+ )
+ upload_audio.upload(
+ fn=save_to_wav2,
+ inputs=[upload_audio],
+ outputs=[audio, output_path],
+ )
+ upload_audio.stop_recording(
+ fn=save_to_wav,
+ inputs=[upload_audio],
+ outputs=[audio, output_path],
+ )
+ clear_outputs_infer.click(
+ fn=delete_outputs,
+ inputs=[],
+ outputs=[],
+ )
+ clear_outputs_batch.click(
+ fn=delete_outputs,
+ inputs=[],
+ outputs=[],
+ )
+ embedder_model.change(
+ fn=toggle_visible_embedder_custom,
+ inputs=[embedder_model],
+ outputs=[embedder_custom],
+ )
+ embedder_model_batch.change(
+ fn=toggle_visible_embedder_custom,
+ inputs=[embedder_model_batch],
+ outputs=[embedder_custom_batch],
+ )
+ move_files_button.click(
+ fn=create_folder_and_move_files,
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
+ outputs=[],
+ )
+ refresh_embedders_button.click(
+ fn=lambda: gr.update(choices=refresh_embedders_folders()),
+ inputs=[],
+ outputs=[embedder_model_custom],
+ )
+ move_files_button_batch.click(
+ fn=create_folder_and_move_files,
+ inputs=[
+ folder_name_input_batch,
+ bin_file_upload_batch,
+ config_file_upload_batch,
+ ],
+ outputs=[],
+ )
+ refresh_embedders_button_batch.click(
+ fn=lambda: gr.update(choices=refresh_embedders_folders()),
+ inputs=[],
+ outputs=[embedder_model_custom_batch],
+ )
+ # Sliders variables
+ reverb_sliders = [
+ reverb_room_size,
+ reverb_damping,
+ reverb_wet_gain,
+ reverb_dry_gain,
+ reverb_width,
+ reverb_freeze_mode,
+ ]
+ pitch_shift_sliders = [pitch_shift_semitones]
+ limiter_sliders = [limiter_threshold, limiter_release_time]
+ gain_sliders = [gain_db]
+ distortion_sliders = [distortion_gain]
+ chorus_sliders = [
+ chorus_rate,
+ chorus_depth,
+ chorus_center_delay,
+ chorus_feedback,
+ chorus_mix,
+ ]
+ bitcrush_sliders = [bitcrush_bit_depth]
+ clipping_sliders = [clipping_threshold]
+ compressor_sliders = [
+ compressor_threshold,
+ compressor_ratio,
+ compressor_attack,
+ compressor_release,
+ ]
+ delay_sliders = [delay_seconds, delay_feedback, delay_mix]
+ sliders = [
+ *reverb_sliders,
+ *pitch_shift_sliders,
+ *limiter_sliders,
+ *gain_sliders,
+ *distortion_sliders,
+ *chorus_sliders,
+ *bitcrush_sliders,
+ *clipping_sliders,
+ *compressor_sliders,
+ *delay_sliders,
+ ]
+ convert_button1.click(
+ fn=run_infer_script,
+ inputs=[
+ pitch,
+ filter_radius,
+ index_rate,
+ rms_mix_rate,
+ protect,
+ hop_length,
+ f0_method,
+ audio,
+ output_path,
+ model_file,
+ index_file,
+ split_audio,
+ autotune,
+ clean_audio,
+ clean_strength,
+ export_format,
+ upscale_audio,
+ f0_file,
+ embedder_model,
+ embedder_model_custom,
+ formant_shifting,
+ formant_qfrency,
+ formant_timbre,
+ post_process,
+ reverb,
+ pitch_shift,
+ limiter,
+ gain,
+ distortion,
+ chorus,
+ bitcrush,
+ clipping,
+ compressor,
+ delay,
+ *sliders,
+ ],
+ outputs=[vc_output1, vc_output2],
+ )
+ # Batch sliders variables
+ reverb_sliders_batch = [
+ reverb_room_size_batch,
+ reverb_damping_batch,
+ reverb_wet_gain_batch,
+ reverb_dry_gain_batch,
+ reverb_width_batch,
+ reverb_freeze_mode_batch,
+ ]
+ pitch_shift_sliders_batch = [pitch_shift_semitones_batch]
+ limiter_sliders_batch = [limiter_threshold_batch, limiter_release_time_batch]
+ gain_sliders_batch = [gain_db_batch]
+ distortion_sliders_batch = [distortion_gain_batch]
+ chorus_sliders_batch = [
+ chorus_rate_batch,
+ chorus_depth_batch,
+ chorus_center_delay_batch,
+ chorus_feedback_batch,
+ chorus_mix_batch,
+ ]
+ bitcrush_sliders_batch = [bitcrush_bit_depth_batch]
+ clipping_sliders_batch = [clipping_threshold_batch]
+ compressor_sliders_batch = [
+ compressor_threshold_batch,
+ compressor_ratio_batch,
+ compressor_attack_batch,
+ compressor_release_batch,
+ ]
+ delay_sliders_batch = [delay_seconds_batch, delay_feedback_batch, delay_mix_batch]
+ sliders_batch = [
+ *reverb_sliders_batch,
+ *pitch_shift_sliders_batch,
+ *limiter_sliders_batch,
+ *gain_sliders_batch,
+ *distortion_sliders_batch,
+ *chorus_sliders_batch,
+ *bitcrush_sliders_batch,
+ *clipping_sliders_batch,
+ *compressor_sliders_batch,
+ *delay_sliders_batch,
+ ]
+ convert_button2.click(
+ fn=run_batch_infer_script,
+ inputs=[
+ pitch_batch,
+ filter_radius_batch,
+ index_rate_batch,
+ rms_mix_rate_batch,
+ protect_batch,
+ hop_length_batch,
+ f0_method_batch,
+ input_folder_batch,
+ output_folder_batch,
+ model_file,
+ index_file,
+ split_audio_batch,
+ autotune_batch,
+ clean_audio_batch,
+ clean_strength_batch,
+ export_format_batch,
+ upscale_audio_batch,
+ f0_file_batch,
+ embedder_model_batch,
+ embedder_model_custom_batch,
+ formant_shifting_batch,
+ formant_qfrency_batch,
+ formant_timbre_batch,
+ post_process_batch,
+ reverb_batch,
+ pitch_shift_batch,
+ limiter_batch,
+ gain_batch,
+ distortion_batch,
+ chorus_batch,
+ bitcrush_batch,
+ clipping_batch,
+ compressor_batch,
+ delay_batch,
+ *sliders_batch,
+ ],
+ outputs=[vc_output3],
+ )
+ convert_button2.click(
+ fn=enable_stop_convert_button,
+ inputs=[],
+ outputs=[convert_button2, stop_button],
+ )
+ stop_button.click(
+ fn=disable_stop_convert_button,
+ inputs=[],
+ outputs=[convert_button2, stop_button],
+ )
diff --git a/tabs/plugins/plugins.py b/tabs/plugins/plugins.py
new file mode 100644
index 0000000000000000000000000000000000000000..afa1d5c92a7fda40405bb219307751ed8f2fc45b
--- /dev/null
+++ b/tabs/plugins/plugins.py
@@ -0,0 +1,34 @@
+import os, sys
+import gradio as gr
+import importlib.util
+import tabs.plugins.plugins_core as plugins_core
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+plugins_core.check_new_folders()
+
+
+def plugins_tab():
+ with gr.TabItem(i18n("Plugin Installer")):
+ dropbox = gr.File(
+ label=i18n("Drag your plugin.zip to install it"),
+ type="filepath",
+ )
+
+ dropbox.upload(
+ fn=plugins_core.save_plugin_dropbox,
+ inputs=[dropbox],
+ outputs=[dropbox],
+ )
+
+ for plugin in os.listdir(os.path.join(now_dir, "tabs", "plugins", "installed")):
+ plugin_main = f"tabs.plugins.installed.{plugin}.plugin"
+ plugin_import = importlib.import_module(plugin_main)
+
+ with gr.TabItem(plugin):
+ plugin_import.applio_plugin()
diff --git a/tabs/plugins/plugins_core.py b/tabs/plugins/plugins_core.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa9ae2375a6e3add320588829222327103a9a9a4
--- /dev/null
+++ b/tabs/plugins/plugins_core.py
@@ -0,0 +1,134 @@
+import os, sys, shutil
+import json
+import gradio as gr
+import zipfile
+import subprocess
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from tabs.settings.restart import restart_applio
+
+plugins_path = os.path.join(now_dir, "tabs", "plugins", "installed")
+if not os.path.exists(plugins_path):
+ os.makedirs(plugins_path)
+json_file_path = os.path.join(now_dir, "assets", "config.json")
+current_folders = os.listdir(plugins_path)
+
+
+def get_existing_folders():
+ if os.path.exists(json_file_path):
+ with open(json_file_path, "r") as file:
+ config = json.load(file)
+ return config["plugins"]
+ else:
+ return []
+
+
+def save_existing_folders(existing_folders):
+ with open(json_file_path, "r") as file:
+ config = json.load(file)
+ config["plugins"] = existing_folders
+ with open(json_file_path, "w") as file:
+ json.dump(config, file, indent=2)
+
+
+def save_plugin_dropbox(dropbox):
+ if "zip" not in dropbox:
+ raise gr.Error(
+ message="The file you dropped is not a valid plugin.zip. Please try again."
+ )
+ else:
+ file_name = os.path.basename(dropbox)
+ folder_name = file_name.split(".zip")[0]
+ folder_path = os.path.join(plugins_path, folder_name)
+ zip_file_path = os.path.join(plugins_path, file_name)
+
+ if os.path.exists(folder_name):
+ os.remove(folder_name)
+
+ shutil.move(dropbox, os.path.join(plugins_path, file_name))
+ print("Proceeding with the extraction...")
+
+ with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+ zip_ref.extractall(plugins_path)
+ os.remove(zip_file_path)
+
+ if os.path.exists(os.path.join(folder_path, "requirements.txt")):
+ if os.name == "nt":
+ subprocess.run(
+ [
+ os.path.join("env", "python.exe"),
+ "-m",
+ "pip",
+ "install",
+ "-r",
+ os.path.join(folder_path, "requirements.txt"),
+ ]
+ )
+ else:
+ subprocess.run(
+ [
+ "python",
+ "-m",
+ "pip",
+ "install",
+ "-r",
+ os.path.join(folder_path, "requirements.txt"),
+ ]
+ )
+ else:
+ print("No requirements.txt file found in the plugin folder.")
+
+ save_existing_folders(get_existing_folders() + [folder_name])
+
+ print(
+ f"{folder_name} plugin installed in {plugins_path}! Restarting applio to apply the changes."
+ )
+ gr.Info(
+ f"{folder_name} plugin installed in {plugins_path}! Restarting applio to apply the changes."
+ )
+ restart_applio()
+ return None
+
+
+def check_new_folders():
+ existing_folders = get_existing_folders()
+ new_folders = set(current_folders) - set(existing_folders)
+ save_existing_folders(current_folders)
+ if new_folders:
+ for new_folder in new_folders:
+ complete_path = os.path.join(plugins_path, new_folder)
+ print(f"New plugin {new_folder} found, installing it...")
+
+ if os.path.exists(os.path.join(complete_path, "requirements.txt")):
+ if os.name == "nt":
+ subprocess.run(
+ [
+ os.path.join("env", "python.exe"),
+ "-m",
+ "pip",
+ "install",
+ "-r",
+ os.path.join(complete_path, "requirements.txt"),
+ ]
+ )
+ else:
+ subprocess.run(
+ [
+ "python",
+ "-m",
+ "pip",
+ "install",
+ "-r",
+ os.path.join(complete_path, "requirements.txt"),
+ ]
+ )
+ else:
+ print("No requirements.txt file found in the plugin folder.")
+ print("Plugins checked and installed! Restarting applio to apply the changes.")
+ restart_applio()
diff --git a/tabs/report/main.js b/tabs/report/main.js
new file mode 100644
index 0000000000000000000000000000000000000000..755cb9ab442c247cab0ab647e1599481bff491aa
--- /dev/null
+++ b/tabs/report/main.js
@@ -0,0 +1,74 @@
+// main.js
+if (!ScreenCastRecorder.isSupportedBrowser()) {
+ console.error("Screen Recording not supported in this browser");
+}
+let recorder;
+let outputBlob;
+const stopRecording = () => __awaiter(void 0, void 0, void 0, function* () {
+ let currentState = "RECORDING";
+ // We should do nothing if the user try to stop recording when it is not started
+ if (currentState === "OFF" || recorder == null) {
+ return;
+ }
+ // if (currentState === "COUNTDOWN") {
+ // this.setState({
+ // currentState: "OFF",
+ // })
+ // }
+ if (currentState === "RECORDING") {
+ if (recorder.getState() === "inactive") {
+ // this.setState({
+ // currentState: "OFF",
+ // })
+ console.log("Inactive");
+ }
+ else {
+ outputBlob = yield recorder.stop();
+ console.log("Done recording");
+ // this.setState({
+ // outputBlob,
+ // currentState: "PREVIEW_FILE",
+ // })
+ window.currentState = "PREVIEW_FILE";
+ const videoSource = URL.createObjectURL(outputBlob);
+ window.videoSource = videoSource;
+ const fileName = "recording";
+ const link = document.createElement("a");
+ link.setAttribute("href", videoSource);
+ link.setAttribute("download", `${fileName}.webm`);
+ link.click();
+ }
+ }
+});
+const startRecording = () => __awaiter(void 0, void 0, void 0, function* () {
+ const recordAudio = false;
+ recorder = new ScreenCastRecorder({
+ recordAudio,
+ onErrorOrStop: () => stopRecording(),
+ });
+ try {
+ yield recorder.initialize();
+ }
+ catch (e) {
+ console.warn(`ScreenCastRecorder.initialize error: ${e}`);
+ // this.setState({ currentState: "UNSUPPORTED" })
+ window.currentState = "UNSUPPORTED";
+ return;
+ }
+ // this.setState({ currentState: "COUNTDOWN" })
+ const hasStarted = recorder.start();
+ if (hasStarted) {
+ // this.setState({
+ // currentState: "RECORDING",
+ // })
+ console.log("Started recording");
+ window.currentState = "RECORDING";
+ }
+ else {
+ stopRecording().catch(err => console.warn(`withScreencast.stopRecording threw an error: ${err}`));
+ }
+});
+
+// Set global functions to window.
+window.startRecording = startRecording;
+window.stopRecording = stopRecording;
\ No newline at end of file
diff --git a/tabs/report/record_button.js b/tabs/report/record_button.js
new file mode 100644
index 0000000000000000000000000000000000000000..aa4fbf33fdaee2635cefc931ef0a786d5b06824a
--- /dev/null
+++ b/tabs/report/record_button.js
@@ -0,0 +1,40 @@
+// Setup if needed and start recording.
+async () => {
+ // Set up recording functions if not already initialized
+ if (!window.startRecording) {
+ let recorder_js = null;
+ let main_js = null;
+ }
+
+ // Function to fetch and convert video blob to base64 using async/await without explicit Promise
+ async function getVideoBlobAsBase64(objectURL) {
+ const response = await fetch(objectURL);
+ if (!response.ok) {
+ throw new Error('Failed to fetch video blob.');
+ }
+
+ const blob = await response.blob();
+
+ const reader = new FileReader();
+ reader.readAsDataURL(blob);
+
+ return new Promise((resolve, reject) => {
+ reader.onloadend = () => {
+ if (reader.result) {
+ resolve(reader.result.split(',')[1]); // Return the base64 string (without data URI prefix)
+ } else {
+ reject('Failed to convert blob to base64.');
+ }
+ };
+ });
+ }
+
+ if (window.currentState === "RECORDING") {
+ await window.stopRecording();
+ const base64String = await getVideoBlobAsBase64(window.videoSource);
+ return base64String;
+ } else {
+ window.startRecording();
+ return "Record";
+ }
+}
diff --git a/tabs/report/recorder.js b/tabs/report/recorder.js
new file mode 100644
index 0000000000000000000000000000000000000000..d054437c04bacb705425f9cd7c6783e3895fade1
--- /dev/null
+++ b/tabs/report/recorder.js
@@ -0,0 +1,112 @@
+// recorder.js
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+ return new (P || (P = Promise))(function (resolve, reject) {
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
+ });
+};
+const BLOB_TYPE = "video/webm";
+class ScreenCastRecorder {
+ /** True if the current browser likely supports screencasts. */
+ static isSupportedBrowser() {
+ return (navigator.mediaDevices != null &&
+ navigator.mediaDevices.getUserMedia != null &&
+ navigator.mediaDevices.getDisplayMedia != null &&
+ MediaRecorder.isTypeSupported(BLOB_TYPE));
+ }
+ constructor({ recordAudio, onErrorOrStop }) {
+ this.recordAudio = recordAudio;
+ this.onErrorOrStopCallback = onErrorOrStop;
+ this.inputStream = null;
+ this.recordedChunks = [];
+ this.mediaRecorder = null;
+ }
+ /**
+ * This asynchronous method will initialize the screen recording object asking
+ * for permissions to the user which are needed to start recording.
+ */
+ initialize() {
+ return __awaiter(this, void 0, void 0, function* () {
+ const desktopStream = yield navigator.mediaDevices.getDisplayMedia({
+ video: true,
+ });
+ let tracks = desktopStream.getTracks();
+ if (this.recordAudio) {
+ const voiceStream = yield navigator.mediaDevices.getUserMedia({
+ video: false,
+ audio: true,
+ });
+ tracks = tracks.concat(voiceStream.getAudioTracks());
+ }
+ this.recordedChunks = [];
+ this.inputStream = new MediaStream(tracks);
+ this.mediaRecorder = new MediaRecorder(this.inputStream, {
+ mimeType: BLOB_TYPE,
+ });
+ this.mediaRecorder.ondataavailable = e => this.recordedChunks.push(e.data);
+ });
+ }
+ getState() {
+ if (this.mediaRecorder) {
+ return this.mediaRecorder.state;
+ }
+ return "inactive";
+ }
+ /**
+ * This method will start the screen recording if the user has granted permissions
+ * and the mediaRecorder has been initialized
+ *
+ * @returns {boolean}
+ */
+ start() {
+ if (!this.mediaRecorder) {
+ console.warn(`ScreenCastRecorder.start: mediaRecorder is null`);
+ return false;
+ }
+ const logRecorderError = (e) => {
+ console.warn(`mediaRecorder.start threw an error: ${e}`);
+ };
+ this.mediaRecorder.onerror = (e) => {
+ logRecorderError(e);
+ this.onErrorOrStopCallback();
+ };
+ this.mediaRecorder.onstop = () => this.onErrorOrStopCallback();
+ try {
+ this.mediaRecorder.start();
+ }
+ catch (e) {
+ logRecorderError(e);
+ return false;
+ }
+ return true;
+ }
+ /**
+ * This method will stop recording and then return the generated Blob
+ *
+ * @returns {(Promise|undefined)}
+ * A Promise which will return the generated Blob
+ * Undefined if the MediaRecorder could not initialize
+ */
+ stop() {
+ if (!this.mediaRecorder) {
+ return undefined;
+ }
+ let resolver;
+ const promise = new Promise(r => {
+ resolver = r;
+ });
+ this.mediaRecorder.onstop = () => resolver();
+ this.mediaRecorder.stop();
+ if (this.inputStream) {
+ this.inputStream.getTracks().forEach(s => s.stop());
+ this.inputStream = null;
+ }
+ return promise.then(() => this.buildOutputBlob());
+ }
+ buildOutputBlob() {
+ return new Blob(this.recordedChunks, { type: BLOB_TYPE });
+ }
+}
\ No newline at end of file
diff --git a/tabs/report/report.py b/tabs/report/report.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f13e14c487fe45e739bd92495fb96572af001c7
--- /dev/null
+++ b/tabs/report/report.py
@@ -0,0 +1,80 @@
+import os
+import sys
+import base64
+import pathlib
+import tempfile
+import gradio as gr
+
+from assets.i18n.i18n import I18nAuto
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+i18n = I18nAuto()
+
+recorder_js_path = os.path.join(now_dir, "tabs", "report", "recorder.js")
+main_js_path = os.path.join(now_dir, "tabs", "report", "main.js")
+record_button_js_path = os.path.join(now_dir, "tabs", "report", "record_button.js")
+
+recorder_js = pathlib.Path(recorder_js_path).read_text()
+main_js = pathlib.Path(main_js_path).read_text()
+record_button_js = (
+ pathlib.Path(record_button_js_path)
+ .read_text()
+ .replace("let recorder_js = null;", recorder_js)
+ .replace("let main_js = null;", main_js)
+)
+
+
+def save_base64_video(base64_string):
+ base64_video = base64_string
+ video_data = base64.b64decode(base64_video)
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
+ temp_filename = temp_file.name
+ temp_file.write(video_data)
+ print(f"Temporary MP4 file saved as: {temp_filename}")
+ return temp_filename
+
+
+def report_tab():
+ instructions = [
+ i18n("# How to Report an Issue on GitHub"),
+ i18n(
+ "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing."
+ ),
+ i18n(
+ "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not)."
+ ),
+ i18n(
+ "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button."
+ ),
+ i18n(
+ "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step."
+ ),
+ ]
+ components = [gr.Markdown(value=instruction) for instruction in instructions]
+
+ start_button = gr.Button("Record Screen")
+ video_component = gr.Video(interactive=False)
+
+ def toggle_button_label(returned_string):
+ if returned_string.startswith("Record"):
+ return gr.Button(value="Stop Recording"), None
+ else:
+ try:
+ temp_filename = save_base64_video(returned_string)
+ except Exception as error:
+ print(f"An error occurred converting video to mp4: {error}")
+ return gr.Button(value="Record Screen"), gr.Warning(
+ f"Failed to convert video to mp4:\n{error}"
+ )
+ return gr.Button(value="Record Screen"), gr.Video(
+ value=temp_filename, interactive=False
+ )
+
+ start_button.click(
+ fn=toggle_button_label,
+ inputs=[start_button],
+ outputs=[start_button, video_component],
+ js=record_button_js,
+ )
diff --git a/tabs/settings/fake_gpu.py b/tabs/settings/fake_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..c958e4a4e673edd56b2a42635e5265e0e50eb617
--- /dev/null
+++ b/tabs/settings/fake_gpu.py
@@ -0,0 +1,55 @@
+import os, sys
+import torch
+import json
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+from tabs.settings.restart import restart_applio
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+i18n = I18nAuto()
+
+ngpu = torch.cuda.device_count()
+config_file = os.path.join(now_dir, "assets", "config.json")
+
+
+def gpu_available():
+ if torch.cuda.is_available() or ngpu != 0:
+ return True
+
+
+def load_fake_gpu():
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+ return config["fake_gpu"]
+
+
+def save_config(value):
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+ config["fake_gpu"] = value
+ with open(config_file, "w", encoding="utf8") as file:
+ json.dump(config, file, indent=2)
+
+
+def fake_gpu_tab():
+ with gr.Row():
+ with gr.Column():
+ presence = gr.Checkbox(
+ label=i18n("Enable fake GPU"),
+ info=i18n(
+ "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)"
+ ),
+ interactive=True,
+ value=load_fake_gpu(),
+ )
+ presence.change(
+ fn=toggle,
+ inputs=[presence],
+ outputs=[],
+ )
+
+
+def toggle(checkbox):
+ save_config(bool(checkbox))
+ restart_applio()
diff --git a/tabs/settings/flask_server.py b/tabs/settings/flask_server.py
new file mode 100644
index 0000000000000000000000000000000000000000..40e3edfd0b85d0ede49985946c37f3c9be093329
--- /dev/null
+++ b/tabs/settings/flask_server.py
@@ -0,0 +1,43 @@
+import os
+import sys
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+import requests
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from assets.flask.server import start_flask, load_config_flask, save_config
+
+i18n = I18nAuto()
+
+
+def flask_server_tab():
+ with gr.Row():
+ with gr.Column():
+ flask_checkbox = gr.Checkbox(
+ label=i18n(
+ "Enable Applio integration with applio.org/models using flask"
+ ),
+ info=i18n(
+ "It will activate the possibility of downloading models with a click from the website."
+ ),
+ interactive=True,
+ value=load_config_flask(),
+ )
+ flask_checkbox.change(
+ fn=toggle,
+ inputs=[flask_checkbox],
+ outputs=[],
+ )
+
+
+def toggle(checkbox):
+ save_config(bool(checkbox))
+ if load_config_flask() == True:
+ start_flask()
+ else:
+ try:
+ requests.post("http://localhost:8000/shutdown")
+ except requests.exceptions.ConnectionError:
+ pass
diff --git a/tabs/settings/lang.py b/tabs/settings/lang.py
new file mode 100644
index 0000000000000000000000000000000000000000..3dc8d7b8b4f2423a709d53f2ef90ebe1577f08b4
--- /dev/null
+++ b/tabs/settings/lang.py
@@ -0,0 +1,57 @@
+import os, sys
+import json
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+i18n = I18nAuto()
+
+config_file = os.path.join(now_dir, "assets", "config.json")
+
+
+def get_language_settings():
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+
+ if config["lang"]["override"] == False:
+ return "Language automatically detected in the system"
+ else:
+ return config["lang"]["selected_lang"]
+
+
+def save_lang_settings(selected_language):
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+
+ if selected_language == "Language automatically detected in the system":
+ config["lang"]["override"] = False
+ else:
+ config["lang"]["override"] = True
+ config["lang"]["selected_lang"] = selected_language
+
+ gr.Info("Language have been saved. Restart Applio to apply the changes.")
+
+ with open(config_file, "w", encoding="utf8") as file:
+ json.dump(config, file, indent=2)
+
+
+def lang_tab():
+ with gr.Column():
+ selected_language = gr.Dropdown(
+ label=i18n("Language"),
+ info=i18n(
+ "Select the language you want to use. (Requires restarting Applio)"
+ ),
+ value=get_language_settings(),
+ choices=["Language automatically detected in the system"]
+ + i18n._get_available_languages(),
+ interactive=True,
+ )
+
+ selected_language.change(
+ fn=save_lang_settings,
+ inputs=[selected_language],
+ outputs=[],
+ )
diff --git a/tabs/settings/precision.py b/tabs/settings/precision.py
new file mode 100644
index 0000000000000000000000000000000000000000..23e41d13395881277e76a77d64b82d64cd8b6dc5
--- /dev/null
+++ b/tabs/settings/precision.py
@@ -0,0 +1,41 @@
+import gradio as gr
+
+from rvc.configs.config import Config
+
+config = Config()
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def precision_tab():
+ with gr.Row():
+ with gr.Column():
+
+ precision = gr.Radio(
+ label=i18n("Precision"),
+ info=i18n(
+ "Select the precision you want to use for training and inference."
+ ),
+ choices=[
+ "fp16",
+ "fp32",
+ ],
+ value=config.get_precision(),
+ interactive=True,
+ )
+ precision_output = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=8,
+ interactive=False,
+ )
+
+ update_button = gr.Button(i18n("Update precision"))
+ update_button.click(
+ fn=config.set_precision,
+ inputs=[precision],
+ outputs=[precision_output],
+ )
diff --git a/tabs/settings/presence.py b/tabs/settings/presence.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4dc2e6a0f6aad731194c30c1a79cdd46544579f
--- /dev/null
+++ b/tabs/settings/presence.py
@@ -0,0 +1,55 @@
+import os
+import sys
+import gradio as gr
+import json
+from assets.i18n.i18n import I18nAuto
+from assets.discord_presence import RPCManager
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+i18n = I18nAuto()
+config_file = os.path.join(now_dir, "assets", "config.json")
+
+
+def load_config_presence():
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+ return config["discord_presence"]
+
+
+def save_config(value):
+ with open(config_file, "r", encoding="utf8") as file:
+ config = json.load(file)
+ config["discord_presence"] = value
+ with open(config_file, "w", encoding="utf8") as file:
+ json.dump(config, file, indent=2)
+
+
+def presence_tab():
+ with gr.Row():
+ with gr.Column():
+ presence = gr.Checkbox(
+ label=i18n("Enable Applio integration with Discord presence"),
+ info=i18n(
+ "It will activate the possibility of displaying the current Applio activity in Discord."
+ ),
+ interactive=True,
+ value=load_config_presence(),
+ )
+ presence.change(
+ fn=toggle,
+ inputs=[presence],
+ outputs=[],
+ )
+
+
+def toggle(checkbox):
+ save_config(bool(checkbox))
+ if load_config_presence() == True:
+ try:
+ RPCManager.start_presence()
+ except KeyboardInterrupt:
+ RPCManager.stop_presence()
+ else:
+ RPCManager.stop_presence()
diff --git a/tabs/settings/restart.py b/tabs/settings/restart.py
new file mode 100644
index 0000000000000000000000000000000000000000..50c3bce5850fe9dac6c11255f58cfb2f1f77951f
--- /dev/null
+++ b/tabs/settings/restart.py
@@ -0,0 +1,58 @@
+import gradio as gr
+import os
+import sys
+import json
+
+now_dir = os.getcwd()
+
+
+def stop_train(model_name: str):
+ pid_file_path = os.path.join(now_dir, "logs", model_name, "config.json")
+ try:
+ with open(pid_file_path, "r") as pid_file:
+ pid_data = json.load(pid_file)
+ pids = pid_data.get("process_pids", [])
+ with open(pid_file_path, "w") as pid_file:
+ pid_data.pop("process_pids", None)
+ json.dump(pid_data, pid_file, indent=4)
+ for pid in pids:
+ os.kill(pid, 9)
+ except:
+ pass
+
+
+def stop_infer():
+ pid_file_path = os.path.join(now_dir, "assets", "infer_pid.txt")
+ try:
+ with open(pid_file_path, "r") as pid_file:
+ pids = [int(pid) for pid in pid_file.readlines()]
+ for pid in pids:
+ os.kill(pid, 9)
+ os.remove(pid_file_path)
+ except:
+ pass
+
+
+def restart_applio():
+ if os.name != "nt":
+ os.system("clear")
+ else:
+ os.system("cls")
+ python = sys.executable
+ os.execl(python, python, *sys.argv)
+
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def restart_tab():
+ with gr.Row():
+ with gr.Column():
+ restart_button = gr.Button(i18n("Restart Applio"))
+ restart_button.click(
+ fn=restart_applio,
+ inputs=[],
+ outputs=[],
+ )
diff --git a/tabs/settings/themes.py b/tabs/settings/themes.py
new file mode 100644
index 0000000000000000000000000000000000000000..410041df8d3c4b441165c835bfbe241c1ebc57c8
--- /dev/null
+++ b/tabs/settings/themes.py
@@ -0,0 +1,30 @@
+import os
+import sys
+import gradio as gr
+
+from assets.i18n.i18n import I18nAuto
+import assets.themes.loadThemes as loadThemes
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+i18n = I18nAuto()
+
+
+def theme_tab():
+ with gr.Row():
+ with gr.Column():
+ themes_select = gr.Dropdown(
+ loadThemes.get_list(),
+ value=loadThemes.read_json(),
+ label=i18n("Theme"),
+ info=i18n(
+ "Select the theme you want to use. (Requires restarting Applio)"
+ ),
+ visible=True,
+ )
+ themes_select.change(
+ fn=loadThemes.select_theme,
+ inputs=themes_select,
+ outputs=[],
+ )
diff --git a/tabs/settings/version.py b/tabs/settings/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bd83884c11e878923627c56d61a58e4b4d8cecf
--- /dev/null
+++ b/tabs/settings/version.py
@@ -0,0 +1,24 @@
+import gradio as gr
+
+from assets.version_checker import compare_version
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+
+def version_tab():
+ with gr.Row():
+ with gr.Column():
+ version_check = gr.Textbox(
+ label=i18n("Version Checker"),
+ info=i18n(
+ "Check which version of Applio is the latest to see if you need to update."
+ ),
+ interactive=False,
+ )
+ version_button = gr.Button(i18n("Check for updates"))
+ version_button.click(
+ fn=compare_version,
+ inputs=[],
+ outputs=[version_check],
+ )
diff --git a/tabs/train/train.py b/tabs/train/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd594b7d892d85528448e46adbf02757c10a4036
--- /dev/null
+++ b/tabs/train/train.py
@@ -0,0 +1,990 @@
+import os
+import shutil
+import sys
+from multiprocessing import cpu_count
+
+import gradio as gr
+
+from assets.i18n.i18n import I18nAuto
+from core import (
+ run_extract_script,
+ run_index_script,
+ run_preprocess_script,
+ run_prerequisites_script,
+ run_train_script,
+)
+from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
+from rvc.lib.utils import format_title
+from tabs.settings.restart import stop_train
+
+i18n = I18nAuto()
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+pretraineds_v1 = [
+ (
+ "pretrained_v1/",
+ [
+ "D32k.pth",
+ "D40k.pth",
+ "D48k.pth",
+ "G32k.pth",
+ "G40k.pth",
+ "G48k.pth",
+ "f0D32k.pth",
+ "f0D40k.pth",
+ "f0D48k.pth",
+ "f0G32k.pth",
+ "f0G40k.pth",
+ "f0G48k.pth",
+ ],
+ ),
+]
+
+folder_mapping = {
+ "pretrained_v1/": "rvc/models/pretraineds/pretrained_v1/",
+}
+
+sup_audioext = {
+ "wav",
+ "mp3",
+ "flac",
+ "ogg",
+ "opus",
+ "m4a",
+ "mp4",
+ "aac",
+ "alac",
+ "wma",
+ "aiff",
+ "webm",
+ "ac3",
+}
+
+# Custom Pretraineds
+pretraineds_custom_path = os.path.join(
+ now_dir, "rvc", "models", "pretraineds", "pretraineds_custom"
+)
+
+pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir)
+
+custom_embedder_root = os.path.join(
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
+)
+custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
+
+os.makedirs(custom_embedder_root, exist_ok=True)
+os.makedirs(pretraineds_custom_path_relative, exist_ok=True)
+
+
+def get_pretrained_list(suffix):
+ return [
+ os.path.join(dirpath, filename)
+ for dirpath, _, filenames in os.walk(pretraineds_custom_path_relative)
+ for filename in filenames
+ if filename.endswith(".pth") and suffix in filename
+ ]
+
+
+pretraineds_list_d = get_pretrained_list("D")
+pretraineds_list_g = get_pretrained_list("G")
+
+
+def refresh_custom_pretraineds():
+ return (
+ {"choices": sorted(get_pretrained_list("G")), "__type__": "update"},
+ {"choices": sorted(get_pretrained_list("D")), "__type__": "update"},
+ )
+
+
+# Dataset Creator
+datasets_path = os.path.join(now_dir, "assets", "datasets")
+
+if not os.path.exists(datasets_path):
+ os.makedirs(datasets_path)
+
+datasets_path_relative = os.path.relpath(datasets_path, now_dir)
+
+
+def get_datasets_list():
+ return [
+ dirpath
+ for dirpath, _, filenames in os.walk(datasets_path_relative)
+ if any(filename.endswith(tuple(sup_audioext)) for filename in filenames)
+ ]
+
+
+def refresh_datasets():
+ return {"choices": sorted(get_datasets_list()), "__type__": "update"}
+
+
+# Model Names
+models_path = os.path.join(now_dir, "logs")
+
+
+def get_models_list():
+ return [
+ os.path.basename(dirpath)
+ for dirpath in os.listdir(models_path)
+ if os.path.isdir(os.path.join(models_path, dirpath))
+ and all(excluded not in dirpath for excluded in ["zips", "mute"])
+ ]
+
+
+def refresh_models():
+ return {"choices": sorted(get_models_list()), "__type__": "update"}
+
+
+# Refresh Models and Datasets
+def refresh_models_and_datasets():
+ return (
+ {"choices": sorted(get_models_list()), "__type__": "update"},
+ {"choices": sorted(get_datasets_list()), "__type__": "update"},
+ )
+
+
+# Refresh Custom Embedders
+def get_embedder_custom_list():
+ return [
+ os.path.join(dirpath, dirname)
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+ for dirname in dirnames
+ ]
+
+
+def refresh_custom_embedder_list():
+ return {"choices": sorted(get_embedder_custom_list()), "__type__": "update"}
+
+
+# Drop Model
+def save_drop_model(dropbox):
+ if ".pth" not in dropbox:
+ gr.Info(
+ i18n(
+ "The file you dropped is not a valid pretrained file. Please try again."
+ )
+ )
+ else:
+ file_name = os.path.basename(dropbox)
+ pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name)
+ if os.path.exists(pretrained_path):
+ os.remove(pretrained_path)
+ shutil.copy(dropbox, pretrained_path)
+ gr.Info(
+ i18n(
+ "Click the refresh button to see the pretrained file in the dropdown menu."
+ )
+ )
+ return None
+
+
+# Drop Dataset
+def save_drop_dataset_audio(dropbox, dataset_name):
+ if not dataset_name:
+ gr.Info("Please enter a valid dataset name. Please try again.")
+ return None, None
+ else:
+ file_extension = os.path.splitext(dropbox)[1][1:].lower()
+ if file_extension not in sup_audioext:
+ gr.Info("The file you dropped is not a valid audio file. Please try again.")
+ else:
+ dataset_name = format_title(dataset_name)
+ audio_file = format_title(os.path.basename(dropbox))
+ dataset_path = os.path.join(now_dir, "assets", "datasets", dataset_name)
+ if not os.path.exists(dataset_path):
+ os.makedirs(dataset_path)
+ destination_path = os.path.join(dataset_path, audio_file)
+ if os.path.exists(destination_path):
+ os.remove(destination_path)
+ shutil.copy(dropbox, destination_path)
+ gr.Info(
+ i18n(
+ "The audio file has been successfully added to the dataset. Please click the preprocess button."
+ )
+ )
+ dataset_path = os.path.dirname(destination_path)
+ relative_dataset_path = os.path.relpath(dataset_path, now_dir)
+
+ return None, relative_dataset_path
+
+
+# Drop Custom Embedder
+def create_folder_and_move_files(folder_name, bin_file, config_file):
+ if not folder_name:
+ return "Folder name must not be empty."
+
+ folder_name = os.path.join(custom_embedder_root, folder_name)
+ os.makedirs(folder_name, exist_ok=True)
+
+ if bin_file:
+ bin_file_path = os.path.join(folder_name, os.path.basename(bin_file))
+ shutil.copy(bin_file, bin_file_path)
+
+ if config_file:
+ config_file_path = os.path.join(folder_name, os.path.basename(config_file))
+ shutil.copy(config_file, config_file_path)
+
+ return f"Files moved to folder {folder_name}"
+
+
+def refresh_embedders_folders():
+ custom_embedders = [
+ os.path.join(dirpath, dirname)
+ for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+ for dirname in dirnames
+ ]
+ return custom_embedders
+
+
+# Export
+## Get Pth and Index Files
+def get_pth_list():
+ return [
+ os.path.relpath(os.path.join(dirpath, filename), now_dir)
+ for dirpath, _, filenames in os.walk(models_path)
+ for filename in filenames
+ if filename.endswith(".pth")
+ ]
+
+
+def get_index_list():
+ return [
+ os.path.relpath(os.path.join(dirpath, filename), now_dir)
+ for dirpath, _, filenames in os.walk(models_path)
+ for filename in filenames
+ if filename.endswith(".index") and "trained" not in filename
+ ]
+
+
+def refresh_pth_and_index_list():
+ return (
+ {"choices": sorted(get_pth_list()), "__type__": "update"},
+ {"choices": sorted(get_index_list()), "__type__": "update"},
+ )
+
+
+## Export Pth and Index Files
+def export_pth(pth_path):
+ if pth_path and os.path.exists(pth_path):
+ return pth_path
+ return None
+
+
+def export_index(index_path):
+ if index_path and os.path.exists(index_path):
+ return index_path
+ return None
+
+
+## Upload to Google Drive
+def upload_to_google_drive(pth_path, index_path):
+ def upload_file(file_path):
+ if file_path:
+ try:
+ gr.Info(f"Uploading {pth_path} to Google Drive...")
+ google_drive_folder = "/content/drive/MyDrive/ApplioExported"
+ if not os.path.exists(google_drive_folder):
+ os.makedirs(google_drive_folder)
+ google_drive_file_path = os.path.join(
+ google_drive_folder, os.path.basename(file_path)
+ )
+ if os.path.exists(google_drive_file_path):
+ os.remove(google_drive_file_path)
+ shutil.copy2(file_path, google_drive_file_path)
+ gr.Info("File uploaded successfully.")
+ except Exception as error:
+ print(f"An error occurred uploading to Google Drive: {error}")
+ gr.Info("Error uploading to Google Drive")
+
+ upload_file(pth_path)
+ upload_file(index_path)
+
+
+# Train Tab
+def train_tab():
+ with gr.Row():
+ model_name = gr.Dropdown(
+ label=i18n("Model Name"),
+ info=i18n("Name of the new model."),
+ choices=get_models_list(),
+ value="my-project",
+ interactive=True,
+ allow_custom_value=True,
+ )
+ sampling_rate = gr.Radio(
+ label=i18n("Sampling Rate"),
+ info=i18n("The sampling rate of the audio files."),
+ choices=["32000", "40000", "48000"],
+ value="40000",
+ interactive=True,
+ )
+ rvc_version = gr.Radio(
+ label=i18n("Model Architecture"),
+ info=i18n("Version of the model architecture."),
+ choices=["v1", "v2"],
+ value="v2",
+ interactive=True,
+ )
+ with gr.Accordion(i18n("Preprocess")):
+ dataset_path = gr.Dropdown(
+ label=i18n("Dataset Path"),
+ info=i18n("Path to the dataset folder."),
+ # placeholder=i18n("Enter dataset path"),
+ choices=get_datasets_list(),
+ allow_custom_value=True,
+ interactive=True,
+ )
+ dataset_creator = gr.Checkbox(
+ label=i18n("Dataset Creator"),
+ value=False,
+ interactive=True,
+ visible=True,
+ )
+ with gr.Column(visible=False) as dataset_creator_settings:
+ with gr.Accordion(i18n("Dataset Creator")):
+ dataset_name = gr.Textbox(
+ label=i18n("Dataset Name"),
+ info=i18n("Name of the new dataset."),
+ placeholder=i18n("Enter dataset name"),
+ interactive=True,
+ )
+ upload_audio_dataset = gr.File(
+ label=i18n("Upload Audio Dataset"),
+ type="filepath",
+ interactive=True,
+ )
+ refresh = gr.Button(i18n("Refresh"))
+
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
+ cpu_cores_preprocess = gr.Slider(
+ 1,
+ 64,
+ cpu_count(),
+ step=1,
+ label=i18n("CPU Cores"),
+ info=i18n(
+ "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases."
+ ),
+ interactive=True,
+ )
+ with gr.Row():
+ cut_preprocess = gr.Checkbox(
+ label=i18n("Audio cutting"),
+ info=i18n(
+ "It's recommended to deactivate this option if your dataset has already been processed."
+ ),
+ value=True,
+ interactive=True,
+ visible=True,
+ )
+ process_effects = gr.Checkbox(
+ label=i18n("Process effects"),
+ info=i18n(
+ "It's recommended to deactivate this option if your dataset has already been processed."
+ ),
+ value=True,
+ interactive=True,
+ visible=True,
+ )
+ preprocess_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=8,
+ interactive=False,
+ )
+
+ with gr.Row():
+ preprocess_button = gr.Button(i18n("Preprocess Dataset"))
+ preprocess_button.click(
+ fn=run_preprocess_script,
+ inputs=[
+ model_name,
+ dataset_path,
+ sampling_rate,
+ cpu_cores_preprocess,
+ cut_preprocess,
+ process_effects,
+ ],
+ outputs=[preprocess_output_info],
+ api_name="preprocess_dataset",
+ )
+
+ with gr.Accordion(i18n("Extract")):
+ with gr.Row():
+ f0_method = gr.Radio(
+ label=i18n("Pitch extraction algorithm"),
+ info=i18n(
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+ ),
+ choices=["crepe", "crepe-tiny", "rmvpe"],
+ value="rmvpe",
+ interactive=True,
+ )
+
+ embedder_model = gr.Radio(
+ label=i18n("Embedder Model"),
+ info=i18n("Model used for learning speaker embedding."),
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ value="contentvec",
+ interactive=True,
+ )
+
+ hop_length = gr.Slider(
+ 1,
+ 512,
+ 128,
+ step=1,
+ label=i18n("Hop Length"),
+ info=i18n(
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy."
+ ),
+ visible=False,
+ interactive=True,
+ )
+ with gr.Row(visible=False) as embedder_custom:
+ with gr.Accordion("Custom Embedder", open=True):
+ with gr.Row():
+ embedder_model_custom = gr.Dropdown(
+ label="Select Custom Embedder",
+ choices=refresh_embedders_folders(),
+ interactive=True,
+ allow_custom_value=True,
+ )
+ refresh_embedders_button = gr.Button("Refresh embedders")
+ folder_name_input = gr.Textbox(label="Folder Name", interactive=True)
+ with gr.Row():
+ bin_file_upload = gr.File(
+ label="Upload .bin", type="filepath", interactive=True
+ )
+ config_file_upload = gr.File(
+ label="Upload .json", type="filepath", interactive=True
+ )
+ move_files_button = gr.Button("Move files to custom embedder folder")
+ pitch_guidance_extract = gr.Checkbox(
+ label=i18n("Pitch Guidance"),
+ info=i18n(
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential."
+ ),
+ value=True,
+ interactive=True,
+ )
+
+ with gr.Accordion(
+ i18n(
+ "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank."
+ ),
+ open=False,
+ ):
+ with gr.Row():
+ with gr.Column():
+ cpu_cores_extract = gr.Slider(
+ 1,
+ 64,
+ cpu_count(),
+ step=1,
+ label=i18n("CPU Cores"),
+ info=i18n(
+ "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
+ ),
+ interactive=True,
+ )
+
+ with gr.Column():
+ gpu_extract = gr.Textbox(
+ label=i18n("GPU Number"),
+ info=i18n(
+ "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
+ ),
+ placeholder=i18n("0 to ∞ separated by -"),
+ value=str(get_number_of_gpus()),
+ interactive=True,
+ )
+ gr.Textbox(
+ label=i18n("GPU Information"),
+ info=i18n("The GPU information will be displayed here."),
+ value=get_gpu_info(),
+ interactive=False,
+ )
+
+ extract_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=8,
+ interactive=False,
+ )
+ extract_button = gr.Button(i18n("Extract Features"))
+ extract_button.click(
+ fn=run_extract_script,
+ inputs=[
+ model_name,
+ rvc_version,
+ f0_method,
+ pitch_guidance_extract,
+ hop_length,
+ cpu_cores_extract,
+ gpu_extract,
+ sampling_rate,
+ embedder_model,
+ embedder_model_custom,
+ ],
+ outputs=[extract_output_info],
+ api_name="extract_features",
+ )
+
+ with gr.Accordion(i18n("Train")):
+ with gr.Row():
+ batch_size = gr.Slider(
+ 1,
+ 50,
+ max_vram_gpu(0),
+ step=1,
+ label=i18n("Batch Size"),
+ info=i18n(
+ "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results."
+ ),
+ interactive=True,
+ )
+ save_every_epoch = gr.Slider(
+ 1,
+ 100,
+ 10,
+ step=1,
+ label=i18n("Save Every Epoch"),
+ info=i18n("Determine at how many epochs the model will saved at."),
+ interactive=True,
+ )
+ total_epoch = gr.Slider(
+ 1,
+ 10000,
+ 500,
+ step=1,
+ label=i18n("Total Epoch"),
+ info=i18n(
+ "Specifies the overall quantity of epochs for the model training process."
+ ),
+ interactive=True,
+ )
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
+ with gr.Row():
+ with gr.Column():
+ save_only_latest = gr.Checkbox(
+ label=i18n("Save Only Latest"),
+ info=i18n(
+ "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space."
+ ),
+ value=False,
+ interactive=True,
+ )
+ save_every_weights = gr.Checkbox(
+ label=i18n("Save Every Weights"),
+ info=i18n(
+ "This setting enables you to save the weights of the model at the conclusion of each epoch."
+ ),
+ value=True,
+ interactive=True,
+ )
+ pretrained = gr.Checkbox(
+ label=i18n("Pretrained"),
+ info=i18n(
+ "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality."
+ ),
+ value=True,
+ interactive=True,
+ )
+ with gr.Column():
+ sync_graph = gr.Checkbox(
+ label=i18n("Sync Graph"),
+ info=i18n(
+ "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model."
+ ),
+ value=False,
+ interactive=True,
+ )
+ cache_dataset_in_gpu = gr.Checkbox(
+ label=i18n("Cache Dataset in GPU"),
+ info=i18n(
+ "Cache the dataset in GPU memory to speed up the training process."
+ ),
+ value=False,
+ interactive=True,
+ )
+ pitch_guidance = gr.Checkbox(
+ label=i18n("Pitch Guidance"),
+ info=i18n(
+ "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential."
+ ),
+ value=True,
+ interactive=True,
+ )
+ with gr.Column():
+ custom_pretrained = gr.Checkbox(
+ label=i18n("Custom Pretrained"),
+ info=i18n(
+ "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
+ ),
+ value=False,
+ interactive=True,
+ )
+ with gr.Column(visible=False) as pretrained_custom_settings:
+ with gr.Accordion(i18n("Pretrained Custom Settings")):
+ upload_pretrained = gr.File(
+ label=i18n("Upload Pretrained Model"),
+ type="filepath",
+ interactive=True,
+ )
+ refresh_custom_pretaineds_button = gr.Button(
+ i18n("Refresh Custom Pretraineds")
+ )
+ g_pretrained_path = gr.Dropdown(
+ label=i18n("Custom Pretrained G"),
+ info=i18n(
+ "Select the custom pretrained model for the generator."
+ ),
+ choices=sorted(pretraineds_list_g),
+ interactive=True,
+ allow_custom_value=True,
+ )
+ d_pretrained_path = gr.Dropdown(
+ label=i18n("Custom Pretrained D"),
+ info=i18n(
+ "Select the custom pretrained model for the discriminator."
+ ),
+ choices=sorted(pretraineds_list_d),
+ interactive=True,
+ allow_custom_value=True,
+ )
+ multiple_gpu = gr.Checkbox(
+ label=i18n("GPU Settings"),
+ info=(
+ i18n(
+ "Sets advanced GPU settings, recommended for users with better GPU architecture."
+ )
+ ),
+ value=False,
+ interactive=True,
+ )
+ with gr.Column(visible=False) as gpu_custom_settings:
+ with gr.Accordion(i18n("GPU Settings")):
+ gpu = gr.Textbox(
+ label=i18n("GPU Number"),
+ info=i18n(
+ "Specify the number of GPUs you wish to utilize for training by entering them separated by hyphens (-)."
+ ),
+ placeholder=i18n("0 to ∞ separated by -"),
+ value=str(get_number_of_gpus()),
+ interactive=True,
+ )
+ gr.Textbox(
+ label=i18n("GPU Information"),
+ info=i18n("The GPU information will be displayed here."),
+ value=get_gpu_info(),
+ interactive=False,
+ )
+ overtraining_detector = gr.Checkbox(
+ label=i18n("Overtraining Detector"),
+ info=i18n(
+ "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data."
+ ),
+ value=False,
+ interactive=True,
+ )
+ with gr.Column(visible=False) as overtraining_settings:
+ with gr.Accordion(i18n("Overtraining Detector Settings")):
+ overtraining_threshold = gr.Slider(
+ 1,
+ 100,
+ 50,
+ step=1,
+ label=i18n("Overtraining Threshold"),
+ info=i18n(
+ "Set the maximum number of epochs you want your model to stop training if no improvement is detected."
+ ),
+ interactive=True,
+ )
+ index_algorithm = gr.Radio(
+ label=i18n("Index Algorithm"),
+ info=i18n(
+ "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
+ ),
+ choices=["Auto", "Faiss", "KMeans"],
+ value="Auto",
+ interactive=True,
+ )
+
+ with gr.Row():
+ train_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ max_lines=8,
+ interactive=False,
+ )
+
+ with gr.Row():
+ train_button = gr.Button(i18n("Start Training"))
+ train_button.click(
+ fn=run_train_script,
+ inputs=[
+ model_name,
+ rvc_version,
+ save_every_epoch,
+ save_only_latest,
+ save_every_weights,
+ total_epoch,
+ sampling_rate,
+ batch_size,
+ gpu,
+ pitch_guidance,
+ overtraining_detector,
+ overtraining_threshold,
+ pretrained,
+ sync_graph,
+ index_algorithm,
+ cache_dataset_in_gpu,
+ custom_pretrained,
+ g_pretrained_path,
+ d_pretrained_path,
+ ],
+ outputs=[train_output_info],
+ api_name="start_training",
+ )
+
+ stop_train_button = gr.Button(i18n("Stop Training"), visible=False)
+ stop_train_button.click(
+ fn=stop_train,
+ inputs=[model_name],
+ outputs=[],
+ )
+
+ index_button = gr.Button(i18n("Generate Index"))
+ index_button.click(
+ fn=run_index_script,
+ inputs=[model_name, rvc_version, index_algorithm],
+ outputs=[train_output_info],
+ api_name="generate_index",
+ )
+
+ with gr.Accordion(i18n("Export Model"), open=False):
+ if not os.name == "nt":
+ gr.Markdown(
+ i18n(
+ "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive."
+ )
+ )
+ with gr.Row():
+ with gr.Column():
+ pth_file_export = gr.File(
+ label=i18n("Exported Pth file"),
+ type="filepath",
+ value=None,
+ interactive=False,
+ )
+ pth_dropdown_export = gr.Dropdown(
+ label=i18n("Pth file"),
+ info=i18n("Select the pth file to be exported"),
+ choices=get_pth_list(),
+ value=None,
+ interactive=True,
+ allow_custom_value=True,
+ )
+ with gr.Column():
+ index_file_export = gr.File(
+ label=i18n("Exported Index File"),
+ type="filepath",
+ value=None,
+ interactive=False,
+ )
+ index_dropdown_export = gr.Dropdown(
+ label=i18n("Index File"),
+ info=i18n("Select the index file to be exported"),
+ choices=get_index_list(),
+ value=None,
+ interactive=True,
+ allow_custom_value=True,
+ )
+ with gr.Row():
+ with gr.Column():
+ refresh_export = gr.Button(i18n("Refresh"))
+ if not os.name == "nt":
+ upload_exported = gr.Button(i18n("Upload"), variant="primary")
+ upload_exported.click(
+ fn=upload_to_google_drive,
+ inputs=[pth_dropdown_export, index_dropdown_export],
+ outputs=[],
+ )
+
+ def toggle_visible(checkbox):
+ return {"visible": checkbox, "__type__": "update"}
+
+ def toggle_visible_hop_length(f0_method):
+ if f0_method == "crepe" or f0_method == "crepe-tiny":
+ return {"visible": True, "__type__": "update"}
+ return {"visible": False, "__type__": "update"}
+
+ def toggle_pretrained(pretrained, custom_pretrained):
+ if custom_pretrained == False:
+ return {"visible": pretrained, "__type__": "update"}, {
+ "visible": False,
+ "__type__": "update",
+ }
+ else:
+ return {"visible": pretrained, "__type__": "update"}, {
+ "visible": pretrained,
+ "__type__": "update",
+ }
+
+ def enable_stop_train_button():
+ return {"visible": False, "__type__": "update"}, {
+ "visible": True,
+ "__type__": "update",
+ }
+
+ def disable_stop_train_button():
+ return {"visible": True, "__type__": "update"}, {
+ "visible": False,
+ "__type__": "update",
+ }
+
+ def download_prerequisites(version):
+ for remote_folder, file_list in pretraineds_v1:
+ local_folder = folder_mapping.get(remote_folder, "")
+ missing = False
+ for file in file_list:
+ destination_path = os.path.join(local_folder, file)
+ if not os.path.exists(destination_path):
+ missing = True
+ if version == "v1" and missing == True:
+ gr.Info(
+ "Downloading prerequisites... Please wait till it finishes to start preprocessing."
+ )
+ run_prerequisites_script("True", "False", "True", "True")
+ gr.Info(
+ "Prerequisites downloaded successfully, you may now start preprocessing."
+ )
+
+ def toggle_visible_embedder_custom(embedder_model):
+ if embedder_model == "custom":
+ return {"visible": True, "__type__": "update"}
+ return {"visible": False, "__type__": "update"}
+
+ rvc_version.change(
+ fn=download_prerequisites,
+ inputs=[rvc_version],
+ outputs=[],
+ )
+
+ refresh.click(
+ fn=refresh_models_and_datasets,
+ inputs=[],
+ outputs=[model_name, dataset_path],
+ )
+
+ dataset_creator.change(
+ fn=toggle_visible,
+ inputs=[dataset_creator],
+ outputs=[dataset_creator_settings],
+ )
+
+ upload_audio_dataset.upload(
+ fn=save_drop_dataset_audio,
+ inputs=[upload_audio_dataset, dataset_name],
+ outputs=[upload_audio_dataset, dataset_path],
+ )
+
+ f0_method.change(
+ fn=toggle_visible_hop_length,
+ inputs=[f0_method],
+ outputs=[hop_length],
+ )
+
+ embedder_model.change(
+ fn=toggle_visible_embedder_custom,
+ inputs=[embedder_model],
+ outputs=[embedder_custom],
+ )
+ embedder_model.change(
+ fn=toggle_visible_embedder_custom,
+ inputs=[embedder_model],
+ outputs=[embedder_custom],
+ )
+ move_files_button.click(
+ fn=create_folder_and_move_files,
+ inputs=[folder_name_input, bin_file_upload, config_file_upload],
+ outputs=[],
+ )
+ refresh_embedders_button.click(
+ fn=refresh_embedders_folders, inputs=[], outputs=[embedder_model_custom]
+ )
+ pretrained.change(
+ fn=toggle_pretrained,
+ inputs=[pretrained, custom_pretrained],
+ outputs=[custom_pretrained, pretrained_custom_settings],
+ )
+
+ custom_pretrained.change(
+ fn=toggle_visible,
+ inputs=[custom_pretrained],
+ outputs=[pretrained_custom_settings],
+ )
+
+ refresh_custom_pretaineds_button.click(
+ fn=refresh_custom_pretraineds,
+ inputs=[],
+ outputs=[g_pretrained_path, d_pretrained_path],
+ )
+
+ upload_pretrained.upload(
+ fn=save_drop_model,
+ inputs=[upload_pretrained],
+ outputs=[upload_pretrained],
+ )
+
+ overtraining_detector.change(
+ fn=toggle_visible,
+ inputs=[overtraining_detector],
+ outputs=[overtraining_settings],
+ )
+
+ multiple_gpu.change(
+ fn=toggle_visible,
+ inputs=[multiple_gpu],
+ outputs=[gpu_custom_settings],
+ )
+
+ train_button.click(
+ fn=enable_stop_train_button,
+ inputs=[],
+ outputs=[train_button, stop_train_button],
+ )
+
+ train_output_info.change(
+ fn=disable_stop_train_button,
+ inputs=[],
+ outputs=[train_button, stop_train_button],
+ )
+
+ pth_dropdown_export.change(
+ fn=export_pth,
+ inputs=[pth_dropdown_export],
+ outputs=[pth_file_export],
+ )
+
+ index_dropdown_export.change(
+ fn=export_index,
+ inputs=[index_dropdown_export],
+ outputs=[index_file_export],
+ )
+
+ refresh_export.click(
+ fn=refresh_pth_and_index_list,
+ inputs=[],
+ outputs=[pth_dropdown_export, index_dropdown_export],
+ )
diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1292513c8623736c2c3d3324695f6acfb3631dc
--- /dev/null
+++ b/tabs/tts/tts.py
@@ -0,0 +1,475 @@
+import os, sys
+import gradio as gr
+import regex as re
+import json
+import random
+import shutil
+
+from core import (
+ run_tts_script,
+)
+
+from assets.i18n.i18n import I18nAuto
+
+i18n = I18nAuto()
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+model_root = os.path.join(now_dir, "logs")
+model_root_relative = os.path.relpath(model_root, now_dir)
+custom_embedder_root = os.path.join(
+ now_dir, "rvc", "models", "embedders", "embedders_custom"
+)
+
+os.makedirs(custom_embedder_root, exist_ok=True)
+
+custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
+
+names = [
+ os.path.join(root, file)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for file in files
+ if (
+ file.endswith((".pth", ".onnx"))
+ and not (file.startswith("G_") or file.startswith("D_"))
+ )
+]
+
+indexes_list = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for name in files
+ if name.endswith(".index") and "trained" not in name
+]
+
+custom_embedders = [
+ os.path.join(dirpath, filename)
+ for dirpath, _, filenames in os.walk(custom_embedder_root_relative)
+ for filename in filenames
+ if filename.endswith(".pt")
+]
+
+
+def change_choices():
+ names = [
+ os.path.join(root, file)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for file in files
+ if (
+ file.endswith((".pth", ".onnx"))
+ and not (file.startswith("G_") or file.startswith("D_"))
+ )
+ ]
+
+ indexes_list = [
+ os.path.join(root, name)
+ for root, _, files in os.walk(model_root_relative, topdown=False)
+ for name in files
+ if name.endswith(".index") and "trained" not in name
+ ]
+
+ custom_embedders = [
+ os.path.join(dirpath, filename)
+ for dirpath, _, filenames in os.walk(custom_embedder_root_relative)
+ for filename in filenames
+ if filename.endswith(".pt")
+ ]
+ return (
+ {"choices": sorted(names), "__type__": "update"},
+ {"choices": sorted(indexes_list), "__type__": "update"},
+ {"choices": sorted(custom_embedders), "__type__": "update"},
+ {"choices": sorted(custom_embedders), "__type__": "update"},
+ )
+
+
+def get_indexes():
+ indexes_list = [
+ os.path.join(dirpath, filename)
+ for dirpath, _, filenames in os.walk(model_root_relative)
+ for filename in filenames
+ if filename.endswith(".index") and "trained" not in filename
+ ]
+
+ return indexes_list if indexes_list else ""
+
+
+def process_input(file_path):
+ with open(file_path, "r") as file:
+ file_contents = file.read()
+ gr.Info(f"The text from the txt file has been loaded!")
+ return file_contents, None
+
+
+def match_index(model_file_value):
+ if model_file_value:
+ model_folder = os.path.dirname(model_file_value)
+ model_name = os.path.basename(model_file_value)
+ index_files = get_indexes()
+ pattern = r"^(.*?)_"
+ match = re.match(pattern, model_name)
+ for index_file in index_files:
+ if os.path.dirname(index_file) == model_folder:
+ return index_file
+ elif match and match.group(1) in os.path.basename(index_file):
+ return index_file
+ elif model_name in os.path.basename(index_file):
+ return index_file
+ return ""
+
+
+def save_drop_custom_embedder(dropbox):
+ if ".pt" not in dropbox:
+ gr.Info(
+ i18n("The file you dropped is not a valid embedder file. Please try again.")
+ )
+ else:
+ file_name = os.path.basename(dropbox)
+ custom_embedder_path = os.path.join(custom_embedder_root, file_name)
+ if os.path.exists(custom_embedder_path):
+ os.remove(custom_embedder_path)
+ shutil.copy(dropbox, custom_embedder_path)
+ gr.Info(
+ i18n(
+ "Click the refresh button to see the embedder file in the dropdown menu."
+ )
+ )
+ return None
+
+
+# TTS tab
+def tts_tab():
+ default_weight = random.choice(names) if names else ""
+ with gr.Row():
+ with gr.Row():
+ model_file = gr.Dropdown(
+ label=i18n("Voice Model"),
+ info=i18n("Select the voice model to use for the conversion."),
+ choices=sorted(names, key=lambda path: os.path.getsize(path)),
+ interactive=True,
+ value=default_weight,
+ allow_custom_value=True,
+ )
+ best_default_index_path = match_index(model_file.value)
+ index_file = gr.Dropdown(
+ label=i18n("Index File"),
+ info=i18n("Select the index file to use for the conversion."),
+ choices=get_indexes(),
+ value=best_default_index_path,
+ interactive=True,
+ allow_custom_value=True,
+ )
+ with gr.Column():
+ refresh_button = gr.Button(i18n("Refresh"))
+ unload_button = gr.Button(i18n("Unload Voice"))
+
+ unload_button.click(
+ fn=lambda: (
+ {"value": "", "__type__": "update"},
+ {"value": "", "__type__": "update"},
+ ),
+ inputs=[],
+ outputs=[model_file, index_file],
+ )
+
+ model_file.select(
+ fn=lambda model_file_value: match_index(model_file_value),
+ inputs=[model_file],
+ outputs=[index_file],
+ )
+
+ json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json")
+ with open(json_path, "r") as file:
+ tts_voices_data = json.load(file)
+
+ short_names = [voice.get("ShortName", "") for voice in tts_voices_data]
+
+ tts_voice = gr.Dropdown(
+ label=i18n("TTS Voices"),
+ info=i18n("Select the TTS voice to use for the conversion."),
+ choices=short_names,
+ interactive=True,
+ value=None,
+ )
+
+ tts_rate = gr.Slider(
+ minimum=-100,
+ maximum=100,
+ step=1,
+ label=i18n("TTS Speed"),
+ info=i18n("Increase or decrease TTS speed"),
+ value=0,
+ interactive=True,
+ )
+
+ tts_text = gr.Textbox(
+ label=i18n("Text to Synthesize"),
+ info=i18n("Enter the text to synthesize."),
+ placeholder=i18n("Enter text to synthesize"),
+ lines=3,
+ )
+
+ txt_file = gr.File(
+ label=i18n("Or you can upload a .txt file"),
+ type="filepath",
+ )
+
+ with gr.Accordion(i18n("Advanced Settings"), open=False):
+ with gr.Column():
+ output_tts_path = gr.Textbox(
+ label=i18n("Output Path for TTS Audio"),
+ placeholder=i18n("Enter output path"),
+ value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"),
+ interactive=True,
+ )
+ output_rvc_path = gr.Textbox(
+ label=i18n("Output Path for RVC Audio"),
+ placeholder=i18n("Enter output path"),
+ value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"),
+ interactive=True,
+ )
+ export_format = gr.Radio(
+ label=i18n("Export Format"),
+ info=i18n("Select the format to export the audio."),
+ choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+ value="WAV",
+ interactive=True,
+ )
+ split_audio = gr.Checkbox(
+ label=i18n("Split Audio"),
+ info=i18n(
+ "Split the audio into chunks for inference to obtain better results in some cases."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ autotune = gr.Checkbox(
+ label=i18n("Autotune"),
+ info=i18n(
+ "Apply a soft autotune to your inferences, recommended for singing conversions."
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ clean_audio = gr.Checkbox(
+ label=i18n("Clean Audio"),
+ info=i18n(
+ "Clean your audio output using noise detection algorithms, recommended for speaking audios."
+ ),
+ visible=True,
+ value=True,
+ interactive=True,
+ )
+ clean_strength = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Clean Strength"),
+ info=i18n(
+ "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+ ),
+ visible=True,
+ value=0.5,
+ interactive=True,
+ )
+ upscale_audio = gr.Checkbox(
+ label=i18n("Upscale Audio"),
+ info=i18n(
+ "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)"
+ ),
+ visible=True,
+ value=False,
+ interactive=True,
+ )
+ pitch = gr.Slider(
+ minimum=-24,
+ maximum=24,
+ step=1,
+ label=i18n("Pitch"),
+ info=i18n(
+ "Set the pitch of the audio, the higher the value, the higher the pitch."
+ ),
+ value=0,
+ interactive=True,
+ )
+ filter_radius = gr.Slider(
+ minimum=0,
+ maximum=7,
+ label=i18n("Filter Radius"),
+ info=i18n(
+ "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration."
+ ),
+ value=3,
+ step=1,
+ interactive=True,
+ )
+ index_rate = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Search Feature Ratio"),
+ info=i18n(
+ "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+ ),
+ value=0.75,
+ interactive=True,
+ )
+ rms_mix_rate = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Volume Envelope"),
+ info=i18n(
+ "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+ ),
+ value=1,
+ interactive=True,
+ )
+ protect = gr.Slider(
+ minimum=0,
+ maximum=0.5,
+ label=i18n("Protect Voiceless Consonants"),
+ info=i18n(
+ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+ ),
+ value=0.5,
+ interactive=True,
+ )
+ hop_length = gr.Slider(
+ minimum=1,
+ maximum=512,
+ step=1,
+ label=i18n("Hop Length"),
+ info=i18n(
+ "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy."
+ ),
+ value=128,
+ interactive=True,
+ )
+ f0_method = gr.Radio(
+ label=i18n("Pitch extraction algorithm"),
+ info=i18n(
+ "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+ ),
+ choices=[
+ "crepe",
+ "crepe-tiny",
+ "rmvpe",
+ "fcpe",
+ "hybrid[rmvpe+fcpe]",
+ ],
+ value="rmvpe",
+ interactive=True,
+ )
+ embedder_model = gr.Radio(
+ label=i18n("Embedder Model"),
+ info=i18n("Model used for learning speaker embedding."),
+ choices=[
+ "contentvec",
+ "chinese-hubert-base",
+ "japanese-hubert-base",
+ "korean-hubert-base",
+ "custom",
+ ],
+ value="contentvec",
+ interactive=True,
+ )
+ with gr.Column(visible=False) as embedder_custom:
+ with gr.Accordion(i18n("Custom Embedder"), open=True):
+ embedder_upload_custom = gr.File(
+ label=i18n("Upload Custom Embedder"),
+ type="filepath",
+ interactive=True,
+ )
+ embedder_custom_refresh = gr.Button(i18n("Refresh"))
+ embedder_model_custom = gr.Dropdown(
+ label=i18n("Custom Embedder"),
+ info=i18n(
+ "Select the custom embedder to use for the conversion."
+ ),
+ choices=sorted(custom_embedders),
+ interactive=True,
+ allow_custom_value=True,
+ )
+ f0_file = gr.File(
+ label=i18n(
+ "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls."
+ ),
+ visible=True,
+ )
+
+ convert_button1 = gr.Button(i18n("Convert"))
+
+ with gr.Row():
+ vc_output1 = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ )
+ vc_output2 = gr.Audio(label=i18n("Export Audio"))
+
+ def toggle_visible(checkbox):
+ return {"visible": checkbox, "__type__": "update"}
+
+ def toggle_visible_embedder_custom(embedder_model):
+ if embedder_model == "custom":
+ return {"visible": True, "__type__": "update"}
+ return {"visible": False, "__type__": "update"}
+
+ clean_audio.change(
+ fn=toggle_visible,
+ inputs=[clean_audio],
+ outputs=[clean_strength],
+ )
+ refresh_button.click(
+ fn=change_choices,
+ inputs=[],
+ outputs=[model_file, index_file],
+ )
+ txt_file.upload(
+ fn=process_input,
+ inputs=[txt_file],
+ outputs=[tts_text, txt_file],
+ )
+ embedder_model.change(
+ fn=toggle_visible_embedder_custom,
+ inputs=[embedder_model],
+ outputs=[embedder_custom],
+ )
+ embedder_upload_custom.upload(
+ fn=save_drop_custom_embedder,
+ inputs=[embedder_upload_custom],
+ outputs=[embedder_upload_custom],
+ )
+ embedder_custom_refresh.click(
+ fn=change_choices,
+ inputs=[],
+ outputs=[model_file, index_file, embedder_model_custom],
+ )
+ convert_button1.click(
+ fn=run_tts_script,
+ inputs=[
+ tts_text,
+ tts_voice,
+ tts_rate,
+ pitch,
+ filter_radius,
+ index_rate,
+ rms_mix_rate,
+ protect,
+ hop_length,
+ f0_method,
+ output_tts_path,
+ output_rvc_path,
+ model_file,
+ index_file,
+ split_audio,
+ autotune,
+ clean_audio,
+ clean_strength,
+ export_format,
+ upscale_audio,
+ f0_file,
+ embedder_model,
+ embedder_model_custom,
+ ],
+ outputs=[vc_output1, vc_output2],
+ )
diff --git a/tabs/voice_blender/voice_blender.py b/tabs/voice_blender/voice_blender.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b740d2a68331d3228139f1d8b19ed25d7a9abd4
--- /dev/null
+++ b/tabs/voice_blender/voice_blender.py
@@ -0,0 +1,99 @@
+import os, sys
+import gradio as gr
+import shutil
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from assets.i18n.i18n import I18nAuto
+from core import run_model_blender_script
+
+i18n = I18nAuto()
+
+
+def update_model_fusion(dropbox):
+ return dropbox, None
+
+
+def voice_blender_tab():
+ gr.Markdown(i18n("## Voice Blender"))
+ gr.Markdown(
+ i18n(
+ "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice."
+ )
+ )
+ with gr.Column():
+ model_fusion_name = gr.Textbox(
+ label=i18n("Model Name"),
+ info=i18n("Name of the new model."),
+ value="",
+ max_lines=1,
+ interactive=True,
+ placeholder=i18n("Enter model name"),
+ )
+ with gr.Row():
+ with gr.Column():
+ model_fusion_a_dropbox = gr.File(
+ label=i18n("Drag and drop your model here"), type="filepath"
+ )
+ model_fusion_a = gr.Textbox(
+ label=i18n("Path to Model"),
+ value="",
+ interactive=True,
+ placeholder=i18n("Enter path to model"),
+ info=i18n("You can also use a custom path."),
+ )
+ with gr.Column():
+ model_fusion_b_dropbox = gr.File(
+ label=i18n("Drag and drop your model here"), type="filepath"
+ )
+ model_fusion_b = gr.Textbox(
+ label=i18n("Path to Model"),
+ value="",
+ interactive=True,
+ placeholder=i18n("Enter path to model"),
+ info=i18n("You can also use a custom path."),
+ )
+ alpha_a = gr.Slider(
+ minimum=0,
+ maximum=1,
+ label=i18n("Blend Ratio"),
+ value=0.5,
+ interactive=True,
+ info=i18n(
+ "Adjusting the position more towards one side or the other will make the model more similar to the first or second."
+ ),
+ )
+ model_fusion_button = gr.Button(i18n("Fusion"), variant="primary")
+ with gr.Row():
+ model_fusion_output_info = gr.Textbox(
+ label=i18n("Output Information"),
+ info=i18n("The output information will be displayed here."),
+ value="",
+ )
+ model_fusion_pth_output = gr.File(
+ label=i18n("Download Model"), type="filepath", interactive=False
+ )
+
+ model_fusion_button.click(
+ fn=run_model_blender_script,
+ inputs=[
+ model_fusion_name,
+ model_fusion_a,
+ model_fusion_b,
+ alpha_a,
+ ],
+ outputs=[model_fusion_output_info, model_fusion_pth_output],
+ )
+
+ model_fusion_a_dropbox.upload(
+ fn=update_model_fusion,
+ inputs=model_fusion_a_dropbox,
+ outputs=[model_fusion_a, model_fusion_a_dropbox],
+ )
+
+ model_fusion_b_dropbox.upload(
+ fn=update_model_fusion,
+ inputs=model_fusion_b_dropbox,
+ outputs=[model_fusion_b, model_fusion_b_dropbox],
+ )