diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..dfe0770424b2a19faf507a501ebfc23be8f54e7b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,35 +1,2 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index ed12c60a8b10c2b682985843bbe58333def3a36d..0000000000000000000000000000000000000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1,13 +0,0 @@ -# These are supported funding model platforms - -github: # -patreon: # Replace with a single Patreon username -open_collective: # Replace with a single Open Collective username -ko_fi: iahispano -tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel -community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry -liberapay: # Replace with a single Liberapay username -issuehunt: # Replace with a single IssueHunt username -otechie: # Replace with a single Otechie username -lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry -custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 98e9406374652b0a6308b4dc92c829198a6c2ede..0000000000000000000000000000000000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: "[BUG]" -labels: '' -assignees: '' - ---- - -**Before You Report a Bug** -Reporting a bug is essential for us to improve our service, but we need detailed information to address the issue effectively. Since every computer setup is unique, there can be various reasons behind a bug. Before reporting, consider potential causes and provide as much detail as possible to help us understand the problem. - -**Bug Description** -Please provide a clear and concise description of the bug. - -**Steps to Reproduce** -Outline the steps to replicate the issue: -1. Go to '...' -2. Click on '....' -3. Scroll down to '....' -4. Observe the error. - -**Expected Behavior** -Describe what you expected to happen. - -**Assets** -Include screenshots or videos if they can illustrate the issue. - -**Desktop Details:** -- Operating System: [e.g., Windows 11] -- Browser: [e.g., Chrome, Safari] - -**Additional Context** -Any additional information that might be relevant to the issue. diff --git a/.github/workflows/code_formatter.yml b/.github/workflows/code_formatter.yml deleted file mode 100644 index 491987af4756ee9e815d8729e2b0c743d8858493..0000000000000000000000000000000000000000 --- a/.github/workflows/code_formatter.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Code Formatter - -on: - push: - branches: - - main - -jobs: - push_format: - runs-on: ubuntu-latest - - permissions: - contents: write - pull-requests: write - - steps: - - uses: actions/checkout@v4 - with: - ref: ${{github.ref_name}} - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install Black - run: pip install "black[jupyter]" - - - name: Run Black - # run: black $(git ls-files '*.py') - run: black . --exclude=".*\.ipynb$" - - - name: Commit Back - continue-on-error: true - id: commitback - run: | - git config --local user.email "github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - git add --all - git commit -m "chore(format): run black on ${{github.ref_name}}" - - - name: Create Pull Request - if: steps.commitback.outcome == 'success' - continue-on-error: true - uses: peter-evans/create-pull-request@v5 - with: - delete-branch: true - body: "Automatically apply code formatter change" - title: "chore(format): run black on ${{github.ref_name}}" - commit-message: "chore(format): run black on ${{github.ref_name}}" - branch: formatter-${{github.ref_name}} diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml deleted file mode 100644 index c981e9e3fccb1dea77bdb4329a0a3b2df5aa040b..0000000000000000000000000000000000000000 --- a/.github/workflows/unittest.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Test preprocess and extract -on: [push, pull_request] -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: ["3.9", "3.10"] - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@main - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - sudo apt update - sudo apt -y install ffmpeg - python -m pip install --upgrade pip - python -m pip install --upgrade setuptools - python -m pip install --upgrade wheel - pip install torch torchvision torchaudio - pip install -r requirements.txt - python core.py prerequisites --models "True" - - name: Test Preprocess - run: | - python core.py preprocess --model_name "Evaluate" --dataset_path "logs/mute/sliced_audios" --sampling_rate "48000" --cpu_cores "2" - - name: Test Extract - run: | - python core.py extract --model_name "Evaluate" --sampling_rate "48000" --cpu_cores "2" \ No newline at end of file diff --git a/.gitignore b/.gitignore index c95114609de3d775e7202b6a3727cce220bc5349..33152a49f590cf6a6b0ce06ac5dae9c19ebdb6cb 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,20 @@ *.pt *.onnx *.pyc +*.pth +*.index *.mp3 *.flac *.ogg *.m4a +*.bin +*.wav +*.txt *.zip *.png +*.safetensors + +logs +env +venv +.venv \ No newline at end of file diff --git a/README.md b/README.md index 2a850a053a668509849a6cca93c4449516e6df72..cad4ba080feb587ce9b0d42cadd4f7be62606e5d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,81 @@ ---- -license: mit -title: Applio Full ZeroGPU -sdk: gradio -emoji: 🗣️ -colorFrom: blue -colorTo: blue -sdk_version: 4.40.0 -app_file: app.py ---- \ No newline at end of file +

+ Applio +

+ +

+ Contributors + Release + Stars + Fork + Issues +

+ +

A simple, high-quality voice conversion tool, focused on ease of use and performance.

+ +

+ 🌐 Website + • + 📚 Documentation + • + ☎️ Discord +

+ +

+ 🛒 Plugins + • + 📦 Compiled + • + 🎮 Playground + • + 🔎 Google Colab (UI) + • + 🔎 Google Colab (No UI) +

+ +## Introduction + +Applio is a powerful voice conversion tool focused on simplicity, quality, and performance. Whether you're an artist, developer, or researcher, Applio offers a straightforward platform for high-quality voice transformations. Its flexible design allows for customization through plugins and configurations, catering to a wide range of projects. + +## Getting Started + +### 1. Installation + +Run the installation script based on your operating system: + +- **Windows:** Double-click `run-install.bat`. +- **Linux/macOS:** Execute `run-install.sh`. + +### 2. Running Applio + +Start Applio using: + +- **Windows:** Double-click `run-applio.bat`. +- **Linux/macOS:** Run `run-applio.sh`. + +This launches the Gradio interface in your default browser. + +### 3. Optional: TensorBoard Monitoring + +To monitor training or visualize data: + +- **Windows:** Run `run-tensorboard.bat`. +- **Linux/macOS:** Run `run-tensorboard.sh`. + +For more detailed instructions, visit the [documentation](https://docs.applio.org). + +## Commercial Usage + +For commercial use, follow the [MIT license](./LICENSE) and contact us at support@applio.org to ensure ethical use. The use of Applio-generated audio files must comply with applicable copyrights. Consider supporting Applio’s development [through a donation](https://ko-fi.com/iahispano). + +## References + +Applio is made possible thanks to these projects and their references: + +- [gradio-screen-recorder](https://huggingface.co/spaces/gstaff/gradio-screen-recorder) by gstaff +- [rvc-cli](https://github.com/blaisewf/rvc-cli) by blaisewf + +### Contributors + + + + diff --git a/app.py b/app.py index ea69c419bfff9f601e3c75ab6e6e18bd310cffaf..c030d896dfe1b9c66e16948acb4ac28f455e7fce 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,21 @@ -import spaces import gradio as gr import sys import os +import logging + +# Constants +DEFAULT_PORT = 6969 +MAX_PORT_ATTEMPTS = 10 + +# Set up logging +logging.getLogger("uvicorn").setLevel(logging.WARNING) +logging.getLogger("httpx").setLevel(logging.WARNING) +# Add current directory to sys.path now_dir = os.getcwd() sys.path.append(now_dir) -# Tabs +# Import Tabs from tabs.inference.inference import inference_tab from tabs.train.train import train_tab from tabs.extra.extra import extra_tab @@ -24,42 +33,50 @@ from tabs.settings.fake_gpu import fake_gpu_tab, gpu_available, load_fake_gpu from tabs.settings.themes import theme_tab from tabs.settings.precision import precision_tab -# Assets -import assets.themes.loadThemes as loadThemes -from assets.i18n.i18n import I18nAuto -import assets.installation_checker as installation_checker -from assets.discord_presence import RPCManager -from assets.flask.server import start_flask, load_config_flask +# Run prerequisites from core import run_prerequisites_script -# Disable logging -import logging - -logging.getLogger("uvicorn").setLevel(logging.WARNING) -logging.getLogger("httpx").setLevel(logging.WARNING) - run_prerequisites_script(False, True, True, True) +# Initialize i18n +from assets.i18n.i18n import I18nAuto + i18n = I18nAuto() -if load_config_presence() == True: + +# Start Discord presence if enabled +if load_config_presence(): + from assets.discord_presence import RPCManager + RPCManager.start_presence() + +# Check installation +import assets.installation_checker as installation_checker + installation_checker.check_installation() -if load_config_flask() == True: +# Start Flask server if enabled +from assets.flask.server import start_flask, load_config_flask + +if load_config_flask(): print("Starting Flask server") start_flask() -my_applio = loadThemes.load_json() -if my_applio: - pass -else: - my_applio = "ParityError/Interstellar" +# Load theme +import assets.themes.loadThemes as loadThemes + +my_applio = loadThemes.load_json() or "ParityError/Interstellar" +# Define Gradio interface with gr.Blocks(theme=my_applio, title="Applio") as Applio: - gr.Markdown("# Applio v3.2.2") + gr.Markdown("# Applio") gr.Markdown( i18n( - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience." + "VITS-based Voice Conversion focused on simplicity, quality and performance." + ) + ) + gr.Markdown( + i18n( + "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)" ) ) with gr.Tab(i18n("Inference")): @@ -105,9 +122,8 @@ with gr.Blocks(theme=my_applio, title="Applio") as Applio: restart_tab() -def launch_gradio(): - Applio.launch() - +def launch_gradio(port): + Applio.launch(share=True) if __name__ == "__main__": - launch_gradio() \ No newline at end of file + launch_gradio() diff --git a/assets/Applio.ipynb b/assets/Applio.ipynb index 3d406f84e65c5314479c8ef740ad9bbb74d3d70d..7480fe7310f3de5902666009eb97c0e75d7aa442 100644 --- a/assets/Applio.ipynb +++ b/assets/Applio.ipynb @@ -7,7 +7,7 @@ }, "source": [ "## **Applio**\n", - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.\n", + "A simple, high-quality voice conversion tool focused on ease of use and performance. \n", "\n", "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n", "\n", @@ -16,10 +16,7 @@ "### **Credits**\n", "- Encryption method: [Hina](https://github.com/hinabl)\n", "- Extra section: [Poopmaster](https://github.com/poiqazwsx)\n", - "- Main development: [Applio Team](https://github.com/IAHispano)\n", - "\n", - "
\n", - "\n" + "- Main development: [Applio Team](https://github.com/IAHispano)" ] }, { @@ -32,16 +29,14 @@ "outputs": [], "source": [ "# @title **Install Applio**\n", - "\n", - "import codecs\n", - "import time\n", "import os\n", - "import csv\n", + "import codecs\n", "import shutil\n", "import tarfile\n", "import subprocess\n", + "\n", "from pathlib import Path\n", - "from datetime import datetime\n", + "from IPython.display import clear_output\n", "\n", "rot_47 = lambda encoded_text: \"\".join(\n", " [\n", @@ -61,67 +56,40 @@ "new_name = rot_47(\"kmjbmvh_hg\")\n", "uioawhd = rot_47(codecs.decode(\"pbbxa://oqbpcj.kwu/QIPqaxivw/Ixxtqw.oqb\", \"rot_13\"))\n", "uyadwa = codecs.decode(\"ncc.cl\", \"rot_13\")\n", - "!git clone --depth 1 $uioawhd --branch 3.2.2 --single-branch\n", - "!mv $org_name $new_name\n", - "%cd $new_name/\n", + "A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n", + "D = \"/\"\n", "\n", - "from IPython.display import clear_output, Javascript\n", + "!git clone --depth 1 $uioawhd $new_name --branch 3.2.4 --single-branch\n", + "%cd $new_name/\n", "\n", "clear_output()\n", "\n", - "E = Exception\n", - "B = print\n", - "\n", - "\n", - "def vidal_setup(ForceIn):\n", - " L = \"Kikpm.ovm.bu\"\n", - " K = \"/content/\"\n", - " C = ForceIn\n", - "\n", - " def F():\n", - " print(\"Installing pip packages...\")\n", - " subprocess.check_call([\"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n", - "\n", - " A = K + rot_47(L)\n", - " G = K + rot_47(L)\n", + "def vidal_setup():\n", + " A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n", " D = \"/\"\n", " if not os.path.exists(A):\n", " M = os.path.dirname(A)\n", " os.makedirs(M, exist_ok=True)\n", " print(\"No cached install found..\")\n", " try:\n", - " N = rot_47(\n", - " codecs.decode(\n", - " \"pbbxa://pcooqvonikm.kw/QIPqaxivw/Ixxtqw/zmawtdm/uiqv/Kwtij/Xvxcz.biz.oh\",\n", + " N = codecs.decode(\n", + " \"uggcf://uhttvatsnpr.pb/VNUvfcnab/Nccyvb/erfbyir/znva/Raivebzrag/Pbyno/Cache.gne.tm\",\n", " \"rot_13\",\n", " )\n", - " )\n", " subprocess.run([\"wget\", \"-O\", A, N])\n", " print(\"Download completed successfully!\")\n", - " except E as H:\n", + " except Exception as H:\n", " print(str(H))\n", " if os.path.exists(A):\n", " os.remove(A)\n", " if Path(A).exists():\n", - " with tarfile.open(G, \"r:gz\") as I:\n", - " for J in I.getmembers():\n", - " O = os.path.join(D, J.name)\n", - " try:\n", - " I.extract(J, D)\n", - " except E as H:\n", - " print(\"Failed to extract a file\")\n", - " C = True\n", - " print(f\"Extraction of {G} to {D} completed.\")\n", + " with tarfile.open(A, \"r:gz\") as I:\n", + " I.extractall(D)\n", + " print(f\"Extraction of {A} to {D} completed.\")\n", " if os.path.exists(A):\n", " os.remove(A)\n", - " if C:\n", - " F()\n", - " C = False\n", - " else:\n", - " F()\n", "\n", - "\n", - "vidal_setup(False)\n", + "vidal_setup()\n", "clear_output()\n", "print(\"Finished installing requirements!\")" ] @@ -143,10 +111,11 @@ "import time\n", "import ipywidgets as widgets\n", "from IPython.display import display\n", + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", "!npm install -g localtunnel\n", "new_name = rot_47(\"kmjbmvh_hg\")\n", - "%cd \"/content/program_ml\"\n", - "uyadwa = codecs.decode(\"ncc.cl\", \"rot_13\")\n", + "%cd $new_name/\n", "share_tunnel = False # @param {type:\"boolean\"}\n", "def start_applio():\n", " if share_tunnel:\n", diff --git a/assets/Applio_Kaggle.ipynb b/assets/Applio_Kaggle.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..353ad28f571935455ff609a0bf4962f819fa4a0a --- /dev/null +++ b/assets/Applio_Kaggle.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{},"source":["## **Applio**\n","A simple, high-quality voice conversion tool focused on ease of use and performance.\n","\n","[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n","\n","
\n","\n","### **Credits**\n","- Encryption method: [Hina](https://github.com/hinabl)\n","- Main development: [Applio Team](https://github.com/IAHispano)"]},{"cell_type":"markdown","metadata":{},"source":["## Install"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["import codecs\n","import os\n","import shutil\n","import tarfile\n","import subprocess\n","from pathlib import Path\n","from IPython.display import clear_output, Javascript\n","rot_47 = lambda encoded_text: \"\".join(\n"," [\n"," (\n"," chr(\n"," (ord(c) - (ord(\"a\") if c.islower() else ord(\"A\")) - 47) % 26\n"," + (ord(\"a\") if c.islower() else ord(\"A\"))\n"," )\n"," if c.isalpha()\n"," else c\n"," )\n"," for c in encoded_text\n"," ]\n",")\n","\n","new_name = rot_47(\"kmjbmvh_hg\")\n","findme = rot_47(codecs.decode(\"pbbxa://oqbpcj.kwu/Dqlitvb/qurwg-mtnqvlmz.oqb\", \"rot_13\"))\n","uioawhd = rot_47(codecs.decode(\"pbbxa://oqbpcj.kwu/QIPqaxivw/Ixxtqw.oqb\", \"rot_13\"))\n","!pip install uv\n","!git clone --depth 1 $uioawhd $new_name --branch 3.2.4\n","clear_output()\n","!mkdir -p /kaggle/tmp\n","%cd /kaggle/tmp\n","!uv venv .venv > /dev/null 2>&1\n","def vidal_setup(ForceIn):\n"," def F():\n"," print(\"Installing pip packages...\")\n"," subprocess.check_call([\"uv\", \"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n","\n"," A = \"/kaggle/working\" + rot_47(\"Kikpm.ovm.bu\")\n"," D = \"/kaggle/tmp\"\n"," if not os.path.exists(A):\n"," M = os.path.dirname(A)\n"," os.makedirs(M, exist_ok=True)\n"," print(\"No cached install found..\")\n"," try:\n"," N = rot_47(codecs.decode(\"pbbxa://pcooqvonikm.kw/QIPqaxivw/Ixxtqw/zmawtdm/uiqv/Mvdqzwumvb/Siootm/SiootmD2.biz.oh?lwevtwil=bzcm\", \"rot_13\"))\n"," subprocess.run([\"wget\",\"-q\" ,\"-O\", A, N])\n"," print(\"Download completed successfully!\")\n"," except Exception as H:\n"," print(str(H))\n"," if os.path.exists(A):\n"," os.remove(A)\n"," if Path(A).exists():\n"," with tarfile.open(A, \"r:gz\") as I:\n"," I.extractall(D)\n"," print(f\"Extraction of {A} to {D} completed.\")\n"," if os.path.exists(A):\n"," os.remove(A)\n"," else:\n"," F()\n","\n","vidal_setup(False)\n","%cd /kaggle/working/program_ml\n","!source /kaggle/tmp/.venv/bin/activate; python core.py \"prerequisites\" --pretraineds_v1 \"False\" --pretraineds_v2 \"True\" --models \"True\" --exe \"False\" > /dev/null 2>&1\n","clear_output()\n","print(\"Finished\")"]},{"cell_type":"markdown","metadata":{},"source":["## Setup Ngrok"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["#https://dashboard.ngrok.com/get-started/your-authtoken (Token Ngrok)\n","!pip install pyngrok\n","!ngrok config add-authtoken token"]},{"cell_type":"markdown","metadata":{},"source":["## Start"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["from pyngrok import ngrok\n","ngrok.kill()\n","%cd /kaggle/working/program_ml\n","os.system(f\". /kaggle/tmp/.venv/bin/activate; imjoy-elfinder --root-dir=/kaggle --port 9876 > /dev/null 2>&1 &\")\n","clear_output()\n","%load_ext tensorboard\n","%tensorboard --logdir logs --port 8077\n","p_tunnel = ngrok.connect(6969)\n","t_tunnel = ngrok.connect(8077)\n","f_tunnel = ngrok.connect(9876)\n","clear_output()\n","print(\"Applio Url:\", p_tunnel.public_url)\n","print(\"Tensorboard Url:\", t_tunnel.public_url)\n","print(\"File Url:\", f_tunnel.public_url)\n","print(\"Save the link for later, this will take a while...\")\n","\n","!source /kaggle/tmp/.venv/bin/activate; python app.py"]},{"cell_type":"markdown","metadata":{"_kg_hide-input":false},"source":["## Extra"]},{"cell_type":"markdown","metadata":{},"source":["## Setup new logs folder format\n","- Put the exact name you put as your Model Name in Applio."]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["\n","modelname = \"Test\"\n","logs_folder = f\"/kaggle/working/program_ml/logs/\" + modelname\n","\n","import os\n","\n","folder_renames = {\n"," \"0_gt_wavs\": \"sliced_audios\",\n"," \"1_16k_wavs\": \"sliced_audios_16k\",\n"," \"2a_f0\": \"f0\",\n"," \"2b-f0nsf\": \"f0_voiced\",\n"," \"3_feature768\": \"v2_extracted\"\n","}\n","\n","def rename_folders(base_path, rename_dict):\n"," for old_name, new_name in rename_dict.items():\n"," old_path = os.path.join(base_path, old_name)\n"," new_path = os.path.join(base_path, new_name)\n"," if os.path.exists(old_path):\n"," os.rename(old_path, new_path)\n"," print(f\"Renamed {old_path} to {new_path}\")\n"," else:\n"," print(f\"Folder {old_path} does not exist\")\n","\n","rename_folders(logs_folder, folder_renames)\n"]}],"metadata":{"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30558,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"}},"nbformat":4,"nbformat_minor":4} diff --git a/assets/Applio_NoUI.ipynb b/assets/Applio_NoUI.ipynb index c6d14f1d29269560edc5b2db2d25045426392c3a..a75b62309a074dd2773aba6af07b524a096b244a 100644 --- a/assets/Applio_NoUI.ipynb +++ b/assets/Applio_NoUI.ipynb @@ -6,11 +6,18 @@ "id": "0pKllbPyK_BC" }, "source": [ - "# Applio NoUI\n", - "Created by [Blaise](https://github.com/blaise-tk) with [Vidal](https://github.com/Vidalnt) and [Poopmaster](https://github.com/poiqazwsx). Based on [RVC_CLI](https://github.com/blaise-tk/RVC_CLI).\n", + "## **Applio NoUI**\n", + "A simple, high-quality voice conversion tool focused on ease of use and performance. \n", "\n", - "- Colab inspired on [RVC v2 Disconnected](https://colab.research.google.com/drive/1XIPCP9ken63S7M6b5ui1b36Cs17sP-NS).\n", - "\n" + "[Support](https://discord.gg/IAHispano) — [Discord Bot](https://discord.com/oauth2/authorize?client_id=1144714449563955302&permissions=1376674695271&scope=bot%20applications.commands) — [Find Voices](https://applio.org/models) — [GitHub](https://github.com/IAHispano/Applio)\n", + "\n", + "
\n", + "\n", + "### **Credits**\n", + "- Encryption method: [Hina](https://github.com/hinabl)\n", + "- Extra section: [Poopmaster](https://github.com/poiqazwsx)\n", + "- Main development: [Applio Team](https://github.com/IAHispano)\n", + "- Colab inspired on [RVC v2 Disconnected](https://colab.research.google.com/drive/1XIPCP9ken63S7M6b5ui1b36Cs17sP-NS)." ] }, { @@ -68,7 +75,7 @@ "outputs": [], "source": [ "# @title Clone\n", - "!git clone https://github.com/IAHispano/Applio --branch 3.2.2 --single-branch\n", + "!git clone https://github.com/IAHispano/Applio --branch 3.2.4 --single-branch\n", "%cd /content/Applio" ] }, @@ -97,54 +104,35 @@ ")\n", "import codecs\n", "import os\n", - "import shutil\n", "import tarfile\n", "import subprocess\n", "from pathlib import Path\n", - "from datetime import datetime\n", - "E = Exception\n", - "B = print\n", - "\n", - "\n", - "def vidal_setup(ForceIn):\n", - " L = \"Kikpm.ovm.bu\"\n", - " K = \"/content/\"\n", - " C = ForceIn\n", - "\n", + "def vidal_setup(C):\n", " def F():\n", " print(\"Installing pip packages...\")\n", " subprocess.check_call([\"pip\", \"install\", \"-r\", \"requirements.txt\", \"--quiet\"])\n", "\n", - " A = K + rot_47(L)\n", - " G = K + rot_47(L)\n", + " A = \"/content/\" + rot_47(\"Kikpm.ovm.bu\")\n", " D = \"/\"\n", " if not os.path.exists(A):\n", " M = os.path.dirname(A)\n", " os.makedirs(M, exist_ok=True)\n", " print(\"No cached install found..\")\n", " try:\n", - " N = rot_47(\n", - " codecs.decode(\n", - " \"pbbxa://pcooqvonikm.kw/QIPqaxivw/Ixxtqw/zmawtdm/uiqv/Kwtij/Xvxcz.biz.oh\",\n", + " N = codecs.decode(\n", + " \"uggcf://uhttvatsnpr.pb/VNUvfcnab/Nccyvb/erfbyir/znva/Raivebzrag/Pbyno/Cache.gne.tm\",\n", " \"rot_13\",\n", " )\n", - " )\n", " subprocess.run([\"wget\", \"-O\", A, N])\n", " print(\"Download completed successfully!\")\n", - " except E as H:\n", + " except Exception as H:\n", " print(str(H))\n", " if os.path.exists(A):\n", " os.remove(A)\n", " if Path(A).exists():\n", - " with tarfile.open(G, \"r:gz\") as I:\n", - " for J in I.getmembers():\n", - " O = os.path.join(D, J.name)\n", - " try:\n", - " I.extract(J, D)\n", - " except E as H:\n", - " print(\"Failed to extract a file\")\n", - " C = True\n", - " print(f\"Extraction of {G} to {D} completed.\")\n", + " with tarfile.open(A, \"r:gz\") as I:\n", + " I.extractall(D)\n", + " print(f\"Extraction of {A} to {D} completed.\")\n", " if os.path.exists(A):\n", " os.remove(A)\n", " if C:\n", @@ -242,8 +230,11 @@ "split_audio = False # @param{type:\"boolean\"}\n", "clean_audio = False # @param{type:\"boolean\"}\n", "f0_autotune = False # @param{type:\"boolean\"}\n", + "formant_shift = False # @param{type:\"boolean\"}\n", + "formant_qfrency = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n", + "formant_timbre = 1.0 # @param {type:\"slider\", min:1.0, max:16.0, step:0.1}\n", "\n", - "!python core.py infer --pitch \"{f0_up_key}\" --filter_radius \"{filter_radius}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\"\n", + "!python core.py infer --pitch \"{f0_up_key}\" --filter_radius \"{filter_radius}\" --volume_envelope \"{rms_mix_rate}\" --index_rate \"{index_rate}\" --hop_length \"{hop_length}\" --protect \"{protect}\" --f0_autotune \"{f0_autotune}\" --f0_method \"{f0_method}\" --input_path \"{input_path}\" --output_path \"{output_path}\" --pth_path \"{pth_file}\" --index_path \"{index_file}\" --split_audio \"{split_audio}\" --clean_audio \"{clean_audio}\" --clean_strength \"{clean_strength}\" --export_format \"{export_format}\" --formant_shifting \"{formant_shift}\" --formant_qfrency \"{formant_qfrency}\" --formant_timbre \"{formant_timbre}\"\n", "\n", "from IPython.display import Audio, display, clear_output\n", "\n", @@ -271,14 +262,17 @@ "outputs": [], "source": [ "# @title Preprocess Dataset\n", + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", "model_name = \"Darwin\" # @param {type:\"string\"}\n", "dataset_path = \"/content/drive/MyDrive/Darwin_Dataset\" # @param {type:\"string\"}\n", "\n", "sample_rate = \"40k\" # @param [\"32k\", \"40k\", \"48k\"] {allow-input: false}\n", "sr = int(sample_rate.rstrip(\"k\")) * 1000\n", "cpu_cores = 2 # @param {type:\"slider\", min:1, max:2, step:1}\n", + "cut_preprocess = True # @param{type:\"boolean\"}\n", "\n", - "!python core.py preprocess --model_name \"{model_name}\" --dataset_path \"{dataset_path}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\"" + "!python core.py preprocess --model_name \"{model_name}\" --dataset_path \"{dataset_path}\" --sample_rate \"{sr}\" --cpu_cores \"{cpu_cores}\" --cut_preprocess \"{cut_preprocess}\"" ] }, { @@ -321,19 +315,15 @@ "import time\n", "\n", "LOGS_FOLDER = \"/content/Applio/logs/\"\n", - "WEIGHTS_FOLDER = LOGS_FOLDER + model_name\n", "GOOGLE_DRIVE_PATH = \"/content/drive/MyDrive/RVC_Backup\"\n", "\n", "\n", "def import_google_drive_backup():\n", " print(\"Importing Google Drive backup...\")\n", - " weights_exist = False\n", " for root, dirs, files in os.walk(GOOGLE_DRIVE_PATH):\n", " for filename in files:\n", " filepath = os.path.join(root, filename)\n", - " if os.path.isfile(filepath) and not filepath.startswith(\n", - " os.path.join(GOOGLE_DRIVE_PATH, \"weights\")\n", - " ):\n", + " if os.path.isfile(filepath):\n", " backup_filepath = os.path.join(\n", " LOGS_FOLDER, os.path.relpath(filepath, GOOGLE_DRIVE_PATH)\n", " )\n", @@ -341,28 +331,8 @@ " if not os.path.exists(backup_folderpath):\n", " os.makedirs(backup_folderpath)\n", " print(f\"Created backup folder: {backup_folderpath}\", flush=True)\n", - " shutil.copy2(filepath, backup_filepath) # copy file with metadata\n", + " shutil.copy2(filepath, backup_filepath)\n", " print(f\"Imported file from Google Drive backup: {filename}\")\n", - " elif filepath.startswith(\n", - " os.path.join(GOOGLE_DRIVE_PATH, \"weights\")\n", - " ) and filename.endswith(\".pth\"):\n", - " weights_exist = True\n", - " weights_filepath = os.path.join(\n", - " WEIGHTS_FOLDER,\n", - " os.path.relpath(\n", - " filepath, os.path.join(GOOGLE_DRIVE_PATH, \"weights\")\n", - " ),\n", - " )\n", - " weights_folderpath = os.path.dirname(weights_filepath)\n", - " if not os.path.exists(weights_folderpath):\n", - " os.makedirs(weights_folderpath)\n", - " print(f\"Created weights folder: {weights_folderpath}\", flush=True)\n", - " shutil.copy2(filepath, weights_filepath) # copy file with metadata\n", - " print(f\"Imported file from weights: {filename}\")\n", - " if weights_exist:\n", - " print(\"Copied weights from Google Drive backup to local weights folder.\")\n", - " else:\n", - " print(\"No weights found in Google Drive backup.\")\n", " print(\"Google Drive backup import completed.\")\n", "\n", "\n", @@ -374,31 +344,6 @@ " return hash_md5.hexdigest()\n", "\n", "\n", - "def copy_weights_folder_to_drive():\n", - " destination_folder = os.path.join(GOOGLE_DRIVE_PATH, \"weights\")\n", - " try:\n", - " if not os.path.exists(destination_folder):\n", - " os.makedirs(destination_folder)\n", - "\n", - " num_copied = 0\n", - " for filename in os.listdir(WEIGHTS_FOLDER):\n", - " if filename.endswith(\".pth\"):\n", - " source_file = os.path.join(WEIGHTS_FOLDER, filename)\n", - " destination_file = os.path.join(destination_folder, filename)\n", - " if not os.path.exists(destination_file):\n", - " shutil.copy2(source_file, destination_file)\n", - " num_copied += 1\n", - " print(f\"Copied {filename} to Google Drive!\")\n", - "\n", - " if num_copied == 0:\n", - " print(\"No new finished models found for copying.\")\n", - " else:\n", - " print(f\"Finished copying {num_copied} files to Google Drive!\")\n", - "\n", - " except Exception as error:\n", - " print(f\"An error occurred during copying weights to Google Drive: {str(error)}\")\n", - "\n", - "\n", "if \"autobackups\" not in globals():\n", " autobackups = False\n", "\n", @@ -422,7 +367,6 @@ " pass\n", "\n", " for root, dirs, files in os.walk(LOGS_FOLDER):\n", - " # Excluding \"zips\" directory\n", " if \"zips\" in dirs:\n", " dirs.remove(\"zips\")\n", " if \"mute\" in dirs:\n", @@ -493,7 +437,7 @@ " print(\"Autobackup Disabled\")\n", "else:\n", " autobackups = True\n", - " print(\"Autobackup Enabled\")\n", + " print(\"Autobackup Enabled\") \n", "\n", "total_epoch = 800 # @param {type:\"integer\"}\n", "batch_size = 15 # @param {type:\"slider\", min:1, max:25, step:0}\n", @@ -557,7 +501,8 @@ "outputs": [], "source": [ "# @title Generate index file\n", - "!python core.py index --model_name \"{model_name}\" --rvc_version \"{rvc_version}\"" + "index_algorithm = \"Auto\" # @param [\"Auto\", \"Faiss\", \"KMeans\"] {allow-input: false}\n", + "!python core.py index --model_name \"{model_name}\" --rvc_version \"{rvc_version}\" --index_algorithm \"{index_algorithm}\"" ] }, { @@ -572,7 +517,7 @@ "# @title Save model\n", "# @markdown Enter the name of the model and the steps. You can find it in your `/content/Applio/logs` folder.\n", "%cd /content\n", - "import shutil, os\n", + "import os, shutil, sys\n", "\n", "model_name = \"Darwin\" # @param {type:\"string\"}\n", "model_epoch = 800 # @param {type:\"integer\"}\n", @@ -581,14 +526,27 @@ "if os.path.exists(\"/content/zips\"):\n", " shutil.rmtree(\"/content/zips\")\n", "print(\"Removed zips.\")\n", - "!mkdir -p /content/zips/{model_name}/\n", + "\n", + "os.makedirs(f\"/content/zips/{model_name}/\", exist_ok=True)\n", "print(\"Created zips.\")\n", - "if f\"{model_name}.pth\" not in os.listdir(f\"/content/Applio/weights\"):\n", + "\n", + "logs_folder = f\"/content/Applio/logs/{model_name}/\"\n", + "weight_file = None\n", + "if not os.path.exists(logs_folder):\n", + " print(f\"Model folder not found.\")\n", + " sys.exit(\"\")\n", + "\n", + "for filename in os.listdir(logs_folder):\n", + " if filename.startswith(f\"{model_name}_{model_epoch}e\") and filename.endswith(\".pth\"):\n", + " weight_file = filename\n", + " break\n", + "if weight_file is None:\n", " print(\"There is no weight file with that name\")\n", + " sys.exit(\"\")\n", "if not save_big_file:\n", - " !cp /content/Applio/logs/{model_name}/added_*.index /content/zips/{model_name}/\n", - " !cp /content/Applio/logs/{model_name}/total_*.npy /content/zips/{model_name}/\n", - " !cp /content/Applio/weights/{model_name}.pth /content/zips/{model_name}/{model_name}{model_epoch}.pth\n", + " !cp {logs_folder}added_*.index /content/zips/{model_name}/\n", + " !cp {logs_folder}total_*.npy /content/zips/{model_name}/\n", + " !cp {logs_folder}{weight_file} /content/zips/{model_name}/\n", " %cd /content/zips\n", " !zip -r {model_name}.zip {model_name}\n", "if save_big_file:\n", @@ -609,16 +567,13 @@ " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n", " else:\n", " !zip -r {ZIPFILEPATH} {os.path.join(logs_folder, filename)}\n", - " for filename in os.listdir(\"./weights\"):\n", - " if model_name in filename:\n", - " !zip -r {ZIPFILEPATH} {os.path.join('./weights/', filename)}\n", "\n", "!mkdir -p /content/drive/MyDrive/RVC_Backup/\n", "shutil.move(\n", " f\"/content/zips/{model_name}.zip\",\n", " f\"/content/drive/MyDrive/RVC_Backup/{model_name}.zip\",\n", ")\n", - "%cd /content\n", + "%cd /content/Applio\n", "shutil.rmtree(\"/content/zips\")" ] }, @@ -702,4 +657,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/assets/config.json b/assets/config.json index a55f46bbdd01aa3ce9fbba7710fded08e097df62..a53213c4a3ed07e38add965a83b180dd1f22dd15 100644 --- a/assets/config.json +++ b/assets/config.json @@ -1,15 +1,16 @@ { - "theme": { - "file": "Applio.py", - "class": "Applio" - }, - "plugins": [], - "discord_presence": false, - "lang": { - "override": false, - "selected_lang": "en_US" - }, - "flask_server": false, - "version": "3.2.2", - "fake_gpu": false + "theme": { + "file": "Applio.py", + "class": "Applio" + }, + "plugins": [], + "discord_presence": true, + "lang": { + "override": false, + "selected_lang": "en_US" + }, + "flask_server": false, + "version": "3.2.5", + "fake_gpu": false, + "model_author": "None" } \ No newline at end of file diff --git a/assets/formant_shift/f2m.json b/assets/formant_shift/f2m.json new file mode 100644 index 0000000000000000000000000000000000000000..895b95326353849269a45342711f0e5e9fae4269 --- /dev/null +++ b/assets/formant_shift/f2m.json @@ -0,0 +1,4 @@ +{ + "formant_qfrency": 1.0, + "formant_timbre": 0.8 +} diff --git a/assets/formant_shift/m2f.json b/assets/formant_shift/m2f.json new file mode 100644 index 0000000000000000000000000000000000000000..24a2274c02846b4990f22eed3c1704100106abf0 --- /dev/null +++ b/assets/formant_shift/m2f.json @@ -0,0 +1,4 @@ +{ + "formant_qfrency": 1.0, + "formant_timbre": 1.2 +} diff --git a/assets/formant_shift/random.json b/assets/formant_shift/random.json new file mode 100644 index 0000000000000000000000000000000000000000..f673d915c52acd26f1b9c639a0a6089703a8721f --- /dev/null +++ b/assets/formant_shift/random.json @@ -0,0 +1,4 @@ +{ + "formant_qfrency": 32.0, + "formant_timbre": 9.8 +} diff --git a/assets/i18n/languages/ar_AR.json b/assets/i18n/languages/ar_AR.json index 053bea4fd473c3097234849847dfc9e39634ec89..ee9d7cce4664d2d8ac83d460e5bb257a1df2e218 100644 --- a/assets/i18n/languages/ar_AR.json +++ b/assets/i18n/languages/ar_AR.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "أداة استنساخ الصوت النهائية ، محسنة بدقة للحصول على قوة لا مثيل لها ، ونمطية ، وتجربة سهلة الاستخدام.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "أداة استنساخ الصوت النهائية ، محسنة بدقة للحصول على قوة لا مثيل لها ، ونمطية ، وتجربة سهلة الاستخدام.", "This section contains some extra utilities that often may be in experimental phases.": "يحتوي هذا القسم على بعض الأدوات المساعدة الإضافية التي قد تكون غالبا في المراحل التجريبية.", "Output Information": "معلومات الإخراج", "The output information will be displayed here.": "سيتم عرض معلومات الإخراج هنا.", @@ -30,8 +30,8 @@ "Enter dataset path": "إدخال مسار مجموعة البيانات", "Sampling Rate": "معدل أخذ العينات", "The sampling rate of the audio files.": "معدل أخذ العينات من الملفات الصوتية.", - "RVC Version": "نسخة RVC", - "The RVC version of the model.": "نسخة RVC من النموذج.", + "Model Architecture": "نسخة RVC", + "Version of the model architecture.": "نسخة RVC من النموذج.", "Preprocess Dataset": "مجموعة بيانات ما قبل المعالجة", "Extract": "استخرج", "Hop Length": "طول القفزة", diff --git a/assets/i18n/languages/bn_BN.json b/assets/i18n/languages/bn_BN.json index d4fd6afb9eb3bda9eb38ada01149b27cb953593d..22cb83aa3a677834dc2b05d7280df1759d1a3aed 100644 --- a/assets/i18n/languages/bn_BN.json +++ b/assets/i18n/languages/bn_BN.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "আলটিমেট ভয়েস ক্লোনিং টুল, অতুলনীয় শক্তি, মডুলারিটি এবং ব্যবহারকারী-বান্ধব অভিজ্ঞতার জন্য নিখুঁতভাবে অপ্টিমাইজ করা।", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "আলটিমেট ভয়েস ক্লোনিং টুল, অতুলনীয় শক্তি, মডুলারিটি এবং ব্যবহারকারী-বান্ধব অভিজ্ঞতার জন্য নিখুঁতভাবে অপ্টিমাইজ করা।", "This section contains some extra utilities that often may be in experimental phases.": "এই বিভাগে কিছু অতিরিক্ত ইউটিলিটি রয়েছে যা প্রায়শই পরীক্ষামূলক পর্যায়ে থাকতে পারে।", "Output Information": "আউটপুট তথ্য", "The output information will be displayed here.": "আউটপুট তথ্য এখানে প্রদর্শিত হবে।", @@ -30,8 +30,8 @@ "Enter dataset path": "ডেটাসেটের পথ লিখুন", "Sampling Rate": "নমুনা হার", "The sampling rate of the audio files.": "অডিও ফাইলের নমুনা হার।", - "RVC Version": "আরভিসি সংস্করণ", - "The RVC version of the model.": "মডেলটির আরভিসি সংস্করণ।", + "Model Architecture": "আরভিসি সংস্করণ", + "Version of the model architecture.": "মডেলটির আরভিসি সংস্করণ।", "Preprocess Dataset": "প্রিপ্রসেস ডেটাসেট", "Extract": "নিষ্কাশন", "Hop Length": "হপ দৈর্ঘ্য", diff --git a/assets/i18n/languages/de_DE.json b/assets/i18n/languages/de_DE.json index 5da6f4adec3ca6b1e722a3ff6044bfd8b5f1e4ae..74553d4f9bce89cebf67aef5246bda1b2c7a7de1 100644 --- a/assets/i18n/languages/de_DE.json +++ b/assets/i18n/languages/de_DE.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Ultimatives Tool zum Klonen von Stimmen, das sorgfältig für unübertroffene Leistung, Modularität und Benutzerfreundlichkeit optimiert wurde.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Ultimatives Tool zum Klonen von Stimmen, das sorgfältig für unübertroffene Leistung, Modularität und Benutzerfreundlichkeit optimiert wurde.", "This section contains some extra utilities that often may be in experimental phases.": "Dieser Abschnitt enthält einige zusätzliche Dienstprogramme, die sich häufig in experimentellen Phasen befinden.", "Output Information": "Informationen zur Ausgabe", "The output information will be displayed here.": "Hier werden die Ausgabeinformationen angezeigt.", @@ -30,8 +30,8 @@ "Enter dataset path": "Datensatz-Pfad eingeben", "Sampling Rate": "Samplingrate", "The sampling rate of the audio files.": "Die Samplingrate der Audiodateien.", - "RVC Version": "RVC-Version", - "The RVC version of the model.": "Die RVC-Version des Modells.", + "Model Architecture": "RVC-Version", + "Version of the model architecture.": "Die RVC-Version des Modells.", "Preprocess Dataset": "Datensatz vorverarbeiten", "Extract": "Extrahieren", "Hop Length": "Sprungweite", diff --git a/assets/i18n/languages/en_US.json b/assets/i18n/languages/en_US.json index 675476e29e097b9021b8445087b06320ed8b7272..50f16502c26f80f8f8eb9fb7b0a977deeafedb94 100644 --- a/assets/i18n/languages/en_US.json +++ b/assets/i18n/languages/en_US.json @@ -1,9 +1,8 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "VITS-based Voice Conversion focused on simplicity, quality and performance.", "This section contains some extra utilities that often may be in experimental phases.": "This section contains some extra utilities that often may be in experimental phases.", "Output Information": "Output Information", "The output information will be displayed here.": "The output information will be displayed here.", - "Inference": "Inference", "Train": "Train", "Extra": "Extra", @@ -15,8 +14,10 @@ "Download": "Download", "Report a Bug": "Report a Bug", "Settings": "Settings", - "Preprocess": "Preprocess", + "Audio cutting": "Audio cutting", + "It's recommended to deactivate this option if your dataset has already been processed.": "It's recommended to deactivate this option if your dataset has already been processed.", + "Process effects": "Process effects", "Model Name": "Model Name", "Name of the new model.": "Name of the new model.", "Enter model name": "Enter model name", @@ -32,10 +33,9 @@ "Enter dataset path": "Enter dataset path", "Sampling Rate": "Sampling Rate", "The sampling rate of the audio files.": "The sampling rate of the audio files.", - "RVC Version": "RVC Version", - "The RVC version of the model.": "The RVC version of the model.", + "Model Architecture": "Model Architecture", + "Version of the model architecture.": "Version of the model architecture.", "Preprocess Dataset": "Preprocess Dataset", - "Embedder Model": "Embedder Model", "Model used for learning speaker embedding.": "Model used for learning speaker embedding.", "Extract": "Extract", @@ -66,12 +66,18 @@ "GPU Custom Settings": "GPU Custom Settings", "GPU Number": "GPU Number", "0 to ∞ separated by -": "0 to ∞ separated by -", + "The GPU information will be displayed here.": "The GPU information will be displayed here.", + "Specify the number of GPUs you wish to utilize for preprocess by entering them separated by hyphens (-). At the moment, using multi-gpu will not have a significant effect.": "Specify the number of GPUs you wish to utilize for preprocess by entering them separated by hyphens (-). At the moment, using multi-gpu will not have a significant effect.", + "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-).": "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-).", + "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases.": "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases.", + "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases.": "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases.", "GPU Information": "GPU Information", "Pitch Guidance": "Pitch Guidance", "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.", "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.", "Extract Features": "Extract Features", "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.", + "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.", "Overtraining Detector": "Overtraining Detector", "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.", "Overtraining Detector Settings": "Overtraining Detector Settings", @@ -79,11 +85,9 @@ "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Set the maximum number of epochs you want your model to stop training if no improvement is detected.", "Sync Graph": "Sync Graph", "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.": "Synchronize the graph of the tensorbaord. Only enable this setting if you are training a new model.", - "Start Training": "Start Training", - "Stop Training & Restart Applio": "Stop Training & Restart Applio", + "Stop Training": "Stop Training", "Generate Index": "Generate Index", - "Export Model": "Export Model", "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.", "Exported Pth file": "Exported Pth file", @@ -91,14 +95,12 @@ "Select the pth file to be exported": "Select the pth file to be exported", "Select the index file to be exported": "Select the index file to be exported", "Upload": "Upload", - "Voice Model": "Voice Model", "Select the voice model to use for the conversion.": "Select the voice model to use for the conversion.", "Index File": "Index File", "Select the index file to use for the conversion.": "Select the index file to use for the conversion.", "Refresh": "Refresh", "Unload Voice": "Unload Voice", - "Single": "Single", "Upload Audio": "Upload Audio", "Select Audio": "Select Audio", @@ -118,6 +120,13 @@ "Upscale Audio": "Upscale Audio", "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)", "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.", + "Formant Shifting": "Formant Shifting", + "Enable formant shifting. Used for male to female and vice-versa convertions.": "Enable formant shifting. Used for male to female and vice-versa convertions.", + "Browse presets for formanting": "Browse presets for formanting", + "Presets are located in /assets/formant_shift folder": "Presets are located in /assets/formant_shift folder", + "Default value is 1.0": "Default value is 1.0", + "Quefrency for formant shifting": "Quefrency for formant shifting", + "Timbre for formant shifting": "Timbre for formant shifting", "Pitch": "Pitch", "Set the pitch of the audio, the higher the value, the higher the pitch.": "Set the pitch of the audio, the higher the value, the higher the pitch.", "Filter Radius": "Filter Radius", @@ -130,10 +139,8 @@ "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.", "Pitch extraction algorithm": "Pitch extraction algorithm", "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.", - "Convert": "Convert", "Export Audio": "Export Audio", - "Batch": "Batch", "Input Folder": "Input Folder", "Select the folder containing the audios to convert.": "Select the folder containing the audios to convert.", @@ -141,9 +148,7 @@ "Output Folder": "Output Folder", "Select the folder where the output audios will be saved.": "Select the folder where the output audios will be saved.", "Enter output path": "Enter output path", - "Get information about the audio": "Get information about the audio", - "## Voice Blender": "## Voice Blender", "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.", "Voice Blender": "Voice Blender", @@ -152,7 +157,6 @@ "Blend Ratio": "Blend Ratio", "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Adjusting the position more towards one side or the other will make the model more similar to the first or second.", "Fusion": "Fusion", - "Path to Model": "Path to Model", "Enter path to model": "Enter path to model", "Model information to be placed": "Model information to be placed", @@ -165,23 +169,18 @@ "Model conversion": "Model conversion", "Pth file": "Pth file", "Output of the pth file": "Output of the pth file", - "Extract F0 Curve": "Extract F0 Curve", "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.", - "# How to Report an Issue on GitHub": "# How to Report an Issue on GitHub", "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.", "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).", "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.", "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.", - "Record Screen": "Record Screen", "Record": "Record", "Stop Recording": "Stop Recording", - "Introduce the model .pth path": "Introduce the model .pth path", "See Model Information": "See Model Information", - "## Download Model": "## Download Model", "Model Link": "Model Link", "Introduce the model link": "Introduce the model link", @@ -195,7 +194,6 @@ "## Download Pretrained Models": "## Download Pretrained Models", "Select the pretrained model you want to download.": "Select the pretrained model you want to download.", "And select the sampling rate": "And select the sampling rate.", - "TTS Voices": "TTS Voices", "TTS Speed": "TTS Speed", "Increase or decrease TTS speed.": "Increase or decrease TTS speed.", @@ -206,7 +204,6 @@ "Enter text to synthesize": "Enter text to synthesize", "Output Path for TTS Audio": "Output Path for TTS Audio", "Output Path for RVC Audio": "Output Path for RVC Audio", - "Enable Applio integration with Discord presence": "Enable Applio integration with Discord presence", "It will activate the possibility of displaying the current Applio activity in Discord.": "It will activate the possibility of displaying the current Applio activity in Discord.", "Enable Applio integration with applio.org/models using flask": "Enable Applio integration with applio.org/models using flask", @@ -221,11 +218,91 @@ "Precision": "Precision", "Select the precision you want to use for training and inference.": "Select the precision you want to use for training and inference.", "Update precision": "Update precision", - "Plugin Installer": "Plugin Installer", "Drag your plugin.zip to install it": "Drag your plugin.zip to install it", - "Version Checker": "Version Checker", "Check which version of Applio is the latest to see if you need to update.": "Check which version of Applio is the latest to see if you need to update.", - "Check for updates": "Check for updates" -} \ No newline at end of file + "Check for updates": "Check for updates", + "Post-Process": "Post-Process", + "Post-process the audio to apply effects to the output.": "Post-process the audio to apply effects to the output.", + "Reverb": "Reverb", + "Apply reverb to the audio.": "Apply reverb to the audio.", + "Reverb Room Size": "Reverb Room Size", + "Set the room size of the reverb.": "Set the room size of the reverb.", + "Reverb Damping": "Reverb Damping", + "Set the damping of the reverb.": "Set the damping of the reverb.", + "Reverb Wet Gain": "Reverb Wet Gain", + "Set the wet gain of the reverb.": "Set the wet gain of the reverb.", + "Reverb Dry Gain": "Reverb Dry Gain", + "Set the dry gain of the reverb.": "Set the dry gain of the reverb.", + "Reverb Width": "Reverb Width", + "Set the width of the reverb.": "Set the width of the reverb.", + "Reverb Freeze Mode": "Reverb Freeze Mode", + "Set the freeze mode of the reverb.": "Set the freeze mode of the reverb.", + "Pitch Shift": "Pitch Shift", + "Apply pitch shift to the audio.": "Apply pitch shift to the audio.", + "Pitch Shift Semitones": "Pitch Shift Semitones", + "Set the pitch shift semitones.": "Set the pitch shift semitones.", + "Limiter": "Limiter", + "Apply limiter to the audio.": "Apply limiter to the audio.", + "Limiter Threshold dB": "Limiter Threshold dB", + "Set the limiter threshold dB.": "Set the limiter threshold dB.", + "Limiter Release Time": "Limiter Release Time", + "Set the limiter release time.": "Set the limiter release time.", + "Gain": "Gain", + "Apply gain to the audio.": "Apply gain to the audio.", + "Gain dB": "Gain dB", + "Set the gain dB.": "Set the gain dB.", + "Distortion": "Distortion", + "Apply distortion to the audio.": "Apply distortion to the audio.", + "Distortion Gain": "Distortion Gain", + "Set the distortion gain.": "Set the distortion gain.", + "Chorus": "Chorus", + "Apply chorus to the audio.": "Apply chorus to the audio.", + "Chorus Rate Hz": "Chorus Rate Hz", + "Set the chorus rate Hz.": "Set the chorus rate Hz.", + "Chorus Depth": "Chorus Depth", + "Set the chorus depth.": "Set the chorus depth.", + "Chorus Center Delay ms": "Chorus Center Delay ms", + "Set the chorus center delay ms.": "Set the chorus center delay ms.", + "Chorus Feedback": "Chorus Feedback", + "Set the chorus feedback.": "Set the chorus feedback.", + "Chorus Mix": "Chorus Mix", + "Set the chorus mix.": "Set the chorus mix.", + "Bitcrush": "Bitcrush", + "Apply bitcrush to the audio.": "Apply bitcrush to the audio.", + "Bitcrush Bit Depth": "Bitcrush Bit Depth", + "Set the bitcrush bit depth.": "Set the bitcrush bit depth.", + "Clipping": "Clipping", + "Apply clipping to the audio.": "Apply clipping to the audio.", + "Clipping Threshold": "Clipping Threshold", + "Set the clipping threshold.": "Set the clipping threshold.", + "Compressor": "Compressor", + "Apply compressor to the audio.": "Apply compressor to the audio.", + "Compressor Threshold dB": "Compressor Threshold dB", + "Set the compressor threshold dB.": "Set the compressor threshold dB.", + "Compressor Ratio": "Compressor Ratio", + "Set the compressor ratio.": "Set the compressor ratio.", + "Compressor Attack ms": "Compressor Attack ms", + "Set the compressor attack ms.": "Set the compressor attack ms.", + "Compressor Release ms": "Compressor Release ms", + "Set the compressor release ms.": "Set the compressor release ms.", + "Delay": "Delay", + "Apply delay to the audio.": "Apply delay to the audio.", + "Delay Seconds": "Delay Seconds", + "Set the delay seconds.": "Set the delay seconds.", + "Delay Feedback": "Delay Feedback", + "Set the delay feedback.": "Set the delay feedback.", + "Delay Mix": "Delay Mix", + "Set the delay mix.": "Set the delay mix.", + "Custom Embedder": "Custom Embedder", + "Select Custom Embedder": "Select Custom Embedder", + "Refresh embedders": "Refresh embedders", + "Folder Name": "Folder Name", + "Upload .bin": "Upload .bin", + "Upload .json": "Upload .json", + "Move files to custom embedder folder": "Move files to custom embedder folder", + "model information": "model information", + "Model Creator": "Model Creator", + "Name of the model creator. (Default: Unknown)": "Name of the model creator. (Default: Unknown)" +} diff --git a/assets/i18n/languages/es_ES.json b/assets/i18n/languages/es_ES.json index 3d4259eaea221719fe11f57ee6d90dca4720c629..a980c48cf936d30d351453f9d9476031f3edb932 100644 --- a/assets/i18n/languages/es_ES.json +++ b/assets/i18n/languages/es_ES.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "La herramienta de clonación de voz definitiva, meticulosamente optimizada para una potencia, modularidad y experiencia de uso inigualables.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "La herramienta de clonación de voz definitiva, meticulosamente optimizada para una potencia, modularidad y experiencia de uso inigualables.", "This section contains some extra utilities that often may be in experimental phases.": "Esta sección contiene algunas utilidades adicionales que a menudo pueden estar en fases experimentales.", "Output Information": "Información de salida", "The output information will be displayed here.": "La información de salida se mostrará aquí.", @@ -30,8 +30,8 @@ "Enter dataset path": "Introduzca la ruta del dataset", "Sampling Rate": "Frecuencia de muestreo", "The sampling rate of the audio files.": "La frecuencia de muestreo de los archivos de audio.", - "RVC Version": "Versión RVC", - "The RVC version of the model.": "La versión RVC del modelo.", + "Model Architecture": "Versión RVC", + "Version of the model architecture.": "La versión RVC del modelo.", "Preprocess Dataset": "Preprocesar dataset", "Embedder Model": "Modelo de incrustación", "Model used for learning speaker embedding.": "Modelo utilizado para aprender la incrustación del hablante.", diff --git a/assets/i18n/languages/fa_FA.json b/assets/i18n/languages/fa_FA.json index 6bbd84f11dbbe1557e19f4eb9e0a7f94d2a5d72a..3a37d5eb1df663cf1bd1f9b43df6eddf36241e99 100644 --- a/assets/i18n/languages/fa_FA.json +++ b/assets/i18n/languages/fa_FA.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "ابزار شبیه سازی صدا نهایی، با دقت برای قدرت بی نظیر، مدولار بودن و تجربه کاربر پسند بهینه شده است.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "ابزار شبیه سازی صدا نهایی، با دقت برای قدرت بی نظیر، مدولار بودن و تجربه کاربر پسند بهینه شده است.", "This section contains some extra utilities that often may be in experimental phases.": "این بخش شامل برخی از ابزارهای اضافی است که اغلب ممکن است در مراحل ازمایشی باشد.", "Output Information": "اطلاعات خروجی", "The output information will be displayed here.": "اطلاعات خروجی در اینجا نمایش داده خواهد شد.", @@ -30,8 +30,8 @@ "Enter dataset path": "وارد کردن مسیر مجموعه داده ها", "Sampling Rate": "نرخ نمونه برداری", "The sampling rate of the audio files.": "نرخ نمونه برداری از فایل های صوتی.", - "RVC Version": "نسخه RVC", - "The RVC version of the model.": "نسخه RVC از مدل.", + "Model Architecture": "نسخه RVC", + "Version of the model architecture.": "نسخه RVC از مدل.", "Preprocess Dataset": "مجموعه داده پیش پردازش", "Extract": "عصاره", "Hop Length": "طول هاپ", diff --git a/assets/i18n/languages/fr_FR.json b/assets/i18n/languages/fr_FR.json index ec75539fb7da0e86f28c6205adb536b075cdc1a6..107f92f8e12bfca14e5e0d19b6c29a47ba0f8fe8 100644 --- a/assets/i18n/languages/fr_FR.json +++ b/assets/i18n/languages/fr_FR.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Outil ultime de clonage vocal, méticuleusement optimisé pour une puissance, une modularité et une expérience conviviales inégalées.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Outil ultime de clonage vocal, méticuleusement optimisé pour une puissance, une modularité et une expérience conviviales inégalées.", "This section contains some extra utilities that often may be in experimental phases.": "Cette section contient quelques utilitaires supplémentaires qui peuvent souvent être en phase expérimentale.", "Output Information": "Informations de sortie", "The output information will be displayed here.": "Les informations de sortie seront affichées ici.", @@ -32,8 +32,8 @@ "Enter dataset path": "Entrez le chemin d’accès au jeu de données", "Sampling Rate": "Fréquence d’échantillonnage", "The sampling rate of the audio files.": "Fréquence d’échantillonnage des fichiers audio.", - "RVC Version": "RVC Version", - "The RVC version of the model.": "La version RVC du modèle.", + "Model Architecture": "Model Architecture", + "Version of the model architecture.": "La version RVC du modèle.", "Preprocess Dataset": "Jeu de données de prétraitement", "Extract": "Extraire", "Hop Length": "Longueur du houblon", diff --git a/assets/i18n/languages/gu_GU.json b/assets/i18n/languages/gu_GU.json index bd5e2817802480c81118550b6f9793ccae9ba2ce..aef1885845c8a9815c53b5fcd494a2f85acedf76 100644 --- a/assets/i18n/languages/gu_GU.json +++ b/assets/i18n/languages/gu_GU.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "અંતિમ વોઇસ ક્લોનિંગ ટૂલ, જે અજોડ શક્તિ, મોડ્યુલારિટી અને વપરાશકર્તા-મૈત્રીપૂર્ણ અનુભવ માટે સાવચેતીપૂર્વક ઓપ્ટિમાઇઝ કરવામાં આવ્યું છે.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "અંતિમ વોઇસ ક્લોનિંગ ટૂલ, જે અજોડ શક્તિ, મોડ્યુલારિટી અને વપરાશકર્તા-મૈત્રીપૂર્ણ અનુભવ માટે સાવચેતીપૂર્વક ઓપ્ટિમાઇઝ કરવામાં આવ્યું છે.", "This section contains some extra utilities that often may be in experimental phases.": "આ વિભાગમાં કેટલીક વધારાની ઉપયોગિતાઓ છે જે ઘણીવાર પ્રાયોગિક તબક્કામાં હોઈ શકે છે.", "Output Information": "આઉટપુટ જાણકારી", "The output information will be displayed here.": "આઉટપુટ જાણકારી અંહિ દર્શાવવામાં આવશે.", @@ -30,8 +30,8 @@ "Enter dataset path": "ડેટાસેટ પાથને દાખલ કરો", "Sampling Rate": "નમૂનાનો દર", "The sampling rate of the audio files.": "ઓડિયો ફાઈલોનો નમૂનાનો દર.", - "RVC Version": "RVC આવૃત્તિ", - "The RVC version of the model.": "મોડેલનું આરવીસી વર્ઝન.", + "Model Architecture": "RVC આવૃત્તિ", + "Version of the model architecture.": "મોડેલનું આરવીસી વર્ઝન.", "Preprocess Dataset": "પ્રીપ્રોસેસ ડેટાસેટ", "Extract": "અર્ક કાઢો", "Hop Length": "હોપ લંબાઈ", diff --git a/assets/i18n/languages/hi_IN.json b/assets/i18n/languages/hi_IN.json index 345d660279ffdccf8bdb00d6d862ef6e80e4dfa6..affef2a5eb813a9c8c46c5fd94678b030262688e 100644 --- a/assets/i18n/languages/hi_IN.json +++ b/assets/i18n/languages/hi_IN.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "अल्टीमेट वॉयस क्लोनिंग टूल, बेजोड़ पॉवर, मॉड्यूलरिटी और उपयोगकर्ता-अनुकूल अनुभव के लिए बारीकी से ऑप्टिमाइज़ किया गया।\n[हिन्दी अनुवाद: Enes](https://discord.com/users/1140031358006202468)", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "अल्टीमेट वॉयस क्लोनिंग टूल, बेजोड़ पॉवर, मॉड्यूलरिटी और उपयोगकर्ता-अनुकूल अनुभव के लिए बारीकी से ऑप्टिमाइज़ किया गया।\n[हिन्दी अनुवाद: Enes](https://discord.com/users/1140031358006202468)", "This section contains some extra utilities that often may be in experimental phases.": "इस खंड में कुछ अतिरिक्त उपयोगिताएँ होती हैं जो अक्सर प्रायोगिक चरणों में हो सकती हैं।", "Output Information": "आउटपुट जानकारी", "The output information will be displayed here.": "आउटपुट जानकारी यहाँ प्रदर्शित की जाएगी।", @@ -32,8 +32,8 @@ "Enter dataset path": "डेटासेट पथ डालें", "Sampling Rate": "नमूनाकरण दर", "The sampling rate of the audio files.": "ऑडियो फ़ाइलों की नमूनाकरण दर।", - "RVC Version": "RVC वर्शन", - "The RVC version of the model.": "मॉडल का RVC वर्शन।", + "Model Architecture": "RVC वर्शन", + "Version of the model architecture.": "मॉडल का RVC वर्शन।", "Preprocess Dataset": "डेटासेट का पूर्व-प्रसंस्करण करें", "Embedder Model": "एम्बेडर मॉडल", diff --git a/assets/i18n/languages/hu_HU.json b/assets/i18n/languages/hu_HU.json index 98b058cf1de5786722eacd897e58e679b591a203..388145cdfcd29dc0e0da420a986f1065347e76a9 100644 --- a/assets/i18n/languages/hu_HU.json +++ b/assets/i18n/languages/hu_HU.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "A legjobb hangklónozó eszköz, aprólékosan optimalizálva a páratlan teljesítmény, a modularitás és a felhasználóbarát élmény érdekében.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "A legjobb hangklónozó eszköz, aprólékosan optimalizálva a páratlan teljesítmény, a modularitás és a felhasználóbarát élmény érdekében.", "This section contains some extra utilities that often may be in experimental phases.": "Ez a szakasz néhány további segédprogramot tartalmaz, amelyek gyakran kísérleti fázisban vannak.", "Output Information": "Kimeneti információk", "The output information will be displayed here.": "A kimeneti információk itt jelennek meg.", @@ -30,8 +30,8 @@ "Enter dataset path": "Adja meg az adatkészlet elérési útját", "Sampling Rate": "Mintavételi arány", "The sampling rate of the audio files.": "Az audiofájlok mintavételi frekvenciája.", - "RVC Version": "RVC verzió", - "The RVC version of the model.": "A modell RVC verziója.", + "Model Architecture": "RVC verzió", + "Version of the model architecture.": "A modell RVC verziója.", "Preprocess Dataset": "Adatkészlet előfeldolgozása", "Extract": "Kivonat", "Hop Length": "Komló hossza", diff --git a/assets/i18n/languages/id_ID.json b/assets/i18n/languages/id_ID.json index 14b0fd95bc03f299424b3b88375fca6d0ff1601a..1d7548c01602869c0f826c10ea5718650f38ba63 100644 --- a/assets/i18n/languages/id_ID.json +++ b/assets/i18n/languages/id_ID.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Alat kloning suara terbaik, dioptimalkan secara cermat untuk kekuatan tak tertandingi, modularitas, dan pengalaman ramah pengguna.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Alat kloning suara terbaik, dioptimalkan secara cermat untuk kekuatan tak tertandingi, modularitas, dan pengalaman ramah pengguna.", "This section contains some extra utilities that often may be in experimental phases.": "Bagian ini berisi beberapa utilitas tambahan yang mungkin sering berada dalam tahap percobaan.", "Output Information": "informasi keluaran", @@ -26,7 +26,7 @@ "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Berkas audio telah berhasil ditambahkan ke dataset. Silakan klik tombol praproses.", "Enter dataset path": "Masukkan jalur kumpulan data", "Sampling Rate": "Tingkat Pengambilan Sampel", - "RVC Version": "Versi RVC", + "Model Architecture": "Versi RVC", "Preprocess Dataset": "Kumpulan Data Praproses", "Extract": "Ekstrak", diff --git a/assets/i18n/languages/it_IT.json b/assets/i18n/languages/it_IT.json index 561977438522c2c6b1639f7e6949e6bdf73c2d9f..a218d3c9b13df86ca08f536328151bb79617c0c8 100644 --- a/assets/i18n/languages/it_IT.json +++ b/assets/i18n/languages/it_IT.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Strumento di clonazione vocale definitivo, meticolosamente ottimizzato per potenza, modularità ed esperienza utente senza rivali.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Strumento di clonazione vocale definitivo, meticolosamente ottimizzato per potenza, modularità ed esperienza utente senza rivali.", "This section contains some extra utilities that often may be in experimental phases.": "Questa sezione contiene alcune utilità aggiuntive che spesso possono essere in fase sperimentale.", "Output Information": "Informazioni sull'output", "The output information will be displayed here.": "Qui verranno visualizzate le informazioni sull'output.", @@ -30,8 +30,8 @@ "Enter dataset path": "Inserisci il percorso del dataset", "Sampling Rate": "Frequenza di campionamento", "The sampling rate of the audio files.": "La frequenza di campionamento dei file audio.", - "RVC Version": "Versione RVC", - "The RVC version of the model.": "La versione RVC del modello.", + "Model Architecture": "Versione RVC", + "Version of the model architecture.": "La versione RVC del modello.", "Preprocess Dataset": "Pre-elabora dataset", "Extract": "Estrai", "Hop Length": "Lunghezza del salto", diff --git a/assets/i18n/languages/ja_JA.json b/assets/i18n/languages/ja_JA.json index 2473daafb991c14e24c8c1b609b3ac182efca353..289644afa81e89a6c66bc287e211d4cebe6728e6 100644 --- a/assets/i18n/languages/ja_JA.json +++ b/assets/i18n/languages/ja_JA.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "他の追随を許さないパワー、モジュール性、そしてユーザーフレンドリーな操作性を実現するために綿密に最適化された究極のボイスクローニングツール。", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "他の追随を許さないパワー、モジュール性、そしてユーザーフレンドリーな操作性を実現するために綿密に最適化された究極のボイスクローニングツール。", "This section contains some extra utilities that often may be in experimental phases.": "このセクションには、多くの場合、実験段階にある可能性のあるいくつかの追加のユーティリティが含まれています。", "Output Information": "出力情報", "The output information will be displayed here.": "出力情報がここに表示されます。", @@ -30,8 +30,8 @@ "Enter dataset path": "データセットのパスを入力", "Sampling Rate": "サンプリングレート", "The sampling rate of the audio files.": "オーディオファイルのサンプリングレートです。", - "RVC Version": "RVC バージョン", - "The RVC version of the model.": "モデルの RVC バージョン。", + "Model Architecture": "RVC バージョン", + "Version of the model architecture.": "モデルの RVC バージョン。", "Preprocess Dataset": "データセットの前処理", "Extract": "抽出", "Hop Length": "ホップ長(Hop Length)", diff --git a/assets/i18n/languages/ko_KO.json b/assets/i18n/languages/ko_KO.json index 36a262e371d43f5737b9bc3f8035820a51108458..b35a6f731c48fb21eea874ffbdd62974d2c0d629 100644 --- a/assets/i18n/languages/ko_KO.json +++ b/assets/i18n/languages/ko_KO.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "타의 추종을 불허하는 성능, 모듈성 및 사용자 친화적인 경험을 위해 세심하게 최적화된 궁극의 음성 복제 도구입니다.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "타의 추종을 불허하는 성능, 모듈성 및 사용자 친화적인 경험을 위해 세심하게 최적화된 궁극의 음성 복제 도구입니다.", "This section contains some extra utilities that often may be in experimental phases.": "이 섹션에는 아직 실험 단계에 있는 추가 유틸리티가 포함되어 있습니다.", "Output Information": "출력 정보", "The output information will be displayed here.": "출력 정보가 여기에 표시됩니다.", @@ -30,8 +30,8 @@ "Enter dataset path": "데이터 세트 경로 입력", "Sampling Rate": "샘플링 레이트", "The sampling rate of the audio files.": "오디오 파일의 샘플링 레이트입니다.", - "RVC Version": "RVC 버전", - "The RVC version of the model.": "모델의 RVC 버전입니다.", + "Model Architecture": "RVC 버전", + "Version of the model architecture.": "모델의 RVC 버전입니다.", "Preprocess Dataset": "데이터 세트 사전 처리", "Extract": "추출물", "Hop Length": "홉 길이", diff --git a/assets/i18n/languages/ml_IN.json b/assets/i18n/languages/ml_IN.json index d20de22513a65792558614f31649db13f9f95620..5ebda51e75828954d7ba9e310f149b5cee5b4309 100644 --- a/assets/i18n/languages/ml_IN.json +++ b/assets/i18n/languages/ml_IN.json @@ -1,204 +1,204 @@ -{ - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "അനന്തമായ ശക്തി, മോഡുലാരിറ്റി, ഉപയോക്തൃ-സൗഹൃദ അനുഭവത്തിനായി യാഥാർത്ഥ്യമാക്കിയ അത്യുന്നതമായ വോയ്സ് ക്ലോണിങ് ടൂൾ.\n[മലയാളത്തിലേക്ക് വായന: Enes](https://discord.com/users/1140031358006202468)", - "This section contains some extra utilities that often may be in experimental phases.": "ഈ പേരിലെ കൊണ്ടാടാൻ പ്രയോജനപ്രദമായ എന്നതിനാൽ കഴിഞ്ഞത് സാധാരണയായ പോസിക്കുകളിൽ അവസാനിക്കാത്ത ചില യന്ത്രങ്ങൾ ഉള്ളതാണ്.", - "Output Information": "പ്രണാമം വിവരം", - "The output information will be displayed here.": "ഇവിടെ പ്രണയ വിവരങ്ങൾ പ്രദശിപ്പിക്കപ്പെടും.", - "Inference": "സൂചന", - "Train": "പ്രശിക്ഷണം", - "Extra": "അധികം", - "Merge Audios": "ഓഡിയോ ഒടിക്കുക", - "Processing": "പ്രൊസസ്സിംഗ്", - "Audio Analyzer": "ഓഡിയോ വിശ്ലേഷണകൾ", - "Model Information": "മോഡൽ വിവരങ്ങൾ", - "Plugins": "പ്ലഗിൻസ്", - "Download": "ഡൗൺലോഡ്", - "Report a Bug": "പിശക് റിപ്പോർട്ട്", - "Settings": "സെറ്റിംഗുകൾ", - "Preprocess": "പ്രൊസസ്", - "Model Name": "മോഡൽ പേര്", - "Name of the new model.": "പുതിയ മോഡലിന്റെ പേര്.", - "Enter model name": "മോഡൽ പേര് നൽകുക", - "Dataset Path": "ഡാറ്റാസെറ്റ് പാത", - "Path to the dataset folder.": "ഡാറ്റാസെറ്റ് ഫോൾഡർക്കുള്ള പാത.", - "Refresh Datasets": "ഡാറ്റാസെറ്റുകൾ പുനഃസൃഷ്ടിക്കുക", - "Dataset Creator": "ഡാറ്റാസെറ്റ് സൃഷ്ടാവ്", - "Dataset Name": "ഡാറ്റാസെറ്റ് പേര്", - "Name of the new dataset.": "പുതിയ ഡാറ്റാസെറ്റിന്റെ പേര്.", - "Enter dataset name": "ഡാറ്റാസെറ്റ് പേര് നൽകുക", - "Upload Audio Dataset": "ഓഡിയോ ഡാറ്റാസെറ്റ് അപ്‌ലോഡ് ചെയ്യുക", - "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ഓഡിയോ ഫയൽ യഥാർത്ഥമായി ഡാറ്റാസെറ്റിലേക്ക് ചേർന്നു. ദയവായി പ്രൊസെസ് ബട്ടൺ അമർത്തുക.", - "Enter dataset path": "ഡാറ്റാസെറ്റ് പാത നൽകുക", - "Sampling Rate": "സാമ്പ്ലിംഗ് റേറ്റ്", - "The sampling rate of the audio files.": "ഓഡിയോ ഫയലുകളുടെ സാമ്പ്ലിംഗ് റേറ്റ്.", - "RVC Version": "RVC പതിപ്പ്", - "The RVC version of the model.": "മോഡലിന്റെ RVC പതിപ്പ്.", - "Preprocess Dataset": "ഡാറ്റാസെറ്റ് പ്രൊസെസ് ചെയ്യുക", - - "Embedder Model": "Embedder Model", - "Model used for learning speaker embedding.": "സ്പീക്കർ എംബെഡ്ഡിംഗ് പഠിപ്പിക്കാൻ ഉപയോഗിക്കുന്ന മോഡൽ.", - "Extract": "എക്സ്ട്രാക്ട്", - "Hop Length": "ഹോപ്പ് ലെന്ത്", - "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "സിസ്റ്റത്തിൽ ഒരു വിശദ പിച്ച് മാറ്റത്തിന്റെ ദാരിദ്ര്യം സൂചിപ്പിക്കുന്നു. ചെറിയ ഹോപ്പ് ലെന്തുകള് ഇൻഫരൻസിനായി കൂടുതൽ സമയം ആവശ്യപ്പെടുന്നു, എന്നിരുന്നാലും ഉയരമായ പിച്ച് അക്ക്യൂറസി നൽകുന്നു.", - "Batch Size": "ബാച്ച് വലുപ്പം", - "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "നിനക്ക് ലഭ്യമായ GPU-യുടെ VRAM നുസരിച്ച് അലൈൻ ചെയ്യുന്നത് പ്രാധാന്യപ്പെടുന്നു. 4-ന്റെ സെറ്റിംഗ് മൊത്തം അക്ക്യൂറസി പരിഷ്കർത്തനവും എല്ലാത്തിനെയും അവലംബപ്പെടുന്നു, എന്നിരുന്നാലും 8 വലുപ്പം അനുഭവജീവനത്തിനായി ഉന്നയിക്കുന്നു.", - "Save Every Epoch": "എന്നാൽ എന്റെ എപ്പൊക്കാലിലെയും മോഡൽ സേവ് ചെയ്യുന്നതിനു വരെ", - "Determine at how many epochs the model will be saved at.": "എന്താണ് എപ്പൊക്കാലിൽ മോഡൽ സേവ് ചെയ്യപ്പെടുന്നതെന്ന് തിരഞ്ഞെടുക്കുക.", - "Total Epoch": "മൊത്തം എപ്പൊക്ക്", - "Specifies the overall quantity of epochs for the model training process.": "മോഡൽ പ്രശിക്ഷണ പ്രക്രിയയ്ക്കായി എപ്പൊക്ക് എത്രയാണ് എന്നത് നിര്ദിഷ്ടമാക്കുന്നു.", - "Pretrained": "പ്രീട്രെയിനെഡ്", - "Save Only Latest": "കേവലായി പുതിയത് മാത്രം സേവ് ചെയ്യുക", - "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "ഈ സെറ്റിംഗ് പ്രവർത്തനത്തിനായി ക്രമീകരിച്ചാൽ, G മറ്റും D ഫയലുകൾ അവരുടെ അവസാന പതിപ്പുകൾക്ക് മാത്രം സേവ് ചെയ്യും, പ്രഭാവകരമായി സ്റ്റോറേജ് സ്ഥലം സംരക്ഷിക്കുന്നതാണ്.", - "Save Every Weights": "എന്റെ എപ്പൊക്കാലിലും ഭാരം സേവ് ചെയ്യുക", - "This setting enables you to save the weights of the model at the conclusion of each epoch.": "ഈ സെറ്റിംഗ് നിര്ദ്ദേശപ്പെടുന്ന ഓരോ എപ്പൊക്കിലും മോഡലിന്റെ ഭാരങ്ങൾ സേവ് ചെയ്യാൻ കഴിയുന്നു.", - "Custom Pretrained": "കസ്റ്റം പ്രീട്രെയിനെഡ്", - "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "കസ്റ്റം പ്രീട്രെയിനെഡ് മോഡലുകൾ ഉപയോഗിക്കുന്നത് ഉന്നത ഫലങ്ങൾ നൽകിയാൽ, സ്പഷ്ടമായ ഉപയോഗ കേസിനായി കൈമേലെ പ്രീട്രെയിനെഡ് മോഡലുകൾ തിരഞ്ഞെടുക്കുന്നത് സാധാരണയായ പ്രദർശനത്തെ വളർത്തുന്നതിനും പ്രദർശനം വളർത്തുന്നതിനും കൂടുതൽ സഹായകമാകും.", - "Upload Pretrained Model": "പ്രീട്രെയിനെഡ് മോഡൽ അപ്ലോഡ് ചെയ്യുക", - "Refresh Custom Pretraineds": "കസ്റ്റം പ്രീട്രെയിനെഡുകൾ പുനഃസ്വന്തമാക്കുക", - "Pretrained Custom Settings": "പ്രീട്രെയിനെഡ് കസ്റ്റം സെറ്റിംഗുകൾ", - "The file you dropped is not a valid pretrained file. Please try again.": "നിനക്ക് ഡ്രോപ്പ് ചെയ്യിയ ഫയൽ ഒരു സാധാരണ പ്രീട്രെയിനെഡ് ഫയലല്ല. ദയവായി വീണ്ടും ശ്രയിക്കുക.", - "Click the refresh button to see the pretrained file in the dropdown menu.": "പ്രീട്രെയിനെഡ് ഫയലെ ഡ്രോപ്പ്ഡൌൺ മെനുവിലെത്താൻ റെഫ്രഷ് ബട്ടൺ ക്ലിക്കുചെയ്യുക.", - "Pretrained G Path": "കസ്റ്റം പ്രീട്രെയിനെഡ് G പാത", - "Pretrained D Path": "കസ്റ്റം പ്രീട്രെയിനെഡ് D പാത", - "GPU Settings": "GPU സെറ്റിംഗുകൾ", - "Sets advanced GPU settings, recommended for users with better GPU architecture.": "അട്വാൻസ്ഡ് GPU സെറ്റിംഗുകൾ സജ്ജീവമാക്കുന്നു, പ്രശസ്ത GPU ആർക്കിടെയുള്ള ഉപയോക്താക്കളിനായി ശിഫാരസ് ചെയ്തത്.", - "GPU Custom Settings": "GPU കസ്റ്റം സെറ്റിംഗുകൾ", - "GPU Number": "GPU നമ്പർ", - "0 to ∞ separated by -": "0 മുതൽ ∞ വരെ - ഒടുക്കിയ", - "GPU Information": "GPU വിവരം", - "Pitch Guidance": "പിച്ച് ഗൈഡൻസ്", - "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "പിച്ച് ഗൈഡൻസ് ഉപയോഗിച്ച്, ഓരിജിനൽ വോയ്സിന്റെ ഇൻറോണേഷനെ, അതിന്റെ പിച്ചു സേവനേന്ന്, സോണ്റിംഗ് മുതലായ സന്നിധികളിൽ പ്രാഥമിക സ്വരം അല്ലെങ്കിൽ പിച്ച് നമ്പറെ പരിരക്ഷിക്കേണ്ടതായ സ്ഥിതികളിലേക്ക് മികച്ച ഫലങ്ങൾ പ്രാപ്തമാക്കാനാകുന്നു.", - "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "നിനക്ക് സ്വന്തമായി പ്രശിക്ഷണം നടത്തുമ്പോൾ പ്രീട്രെയിനെഡ് മോഡലുകൾ ഉപയോഗിക്കുക. ഈ രീതി പ്രശിക്ഷണ സമയം കുറയ്ക്കുന്നുവെങ്കിൽ മൊത്തം ഗുണമേന്മ വരും.", - "Extract Features": "ഫീച്ചർ എക്സ്ട്രാക്ട്", - "Overtraining Detector": "ഓവർട്രെയിനിംഗ് ഡിറ്റക്റ്റർ", - "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "പ്രശിക്ഷണ ഡാറ്റയെ നിങ്ങളുടെ മോഡൽ അതിന്റെ തരംതിരിച്ചു പഠിക്കാൻ അനുവദിക്കുന്നത് നിലവിൽ നിന്ന് ഓവർട്രെയിനിംഗ് ശ്രമിക്കുക.", - "Overtraining Detector Settings": "ഓവർട്രെയിനിംഗ് ഡിറ്റക്റ്റർ സെറ്റിംഗുകൾ", - "Overtraining Threshold": "ഓവർട്രെയിനിംഗ് താഴ്ന്ന മിതം", - "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "യാത്രാവധി പരിശോധിച്ചിട്ടില്ലാത്ത അഭിവൃദ്ധി നിരയെടുക്കുകയാണെങ്കിൽ നിങ്ങളുടെ മോഡൽ പരിശോധനയെന്നത് പ്രശിക്ഷണം നിലനിൽക്കാനുള്ള ഏറ്റവും ഉന്നത എപ്പോക്കുകൾ സജ്ജമാക്കുക.", - - "Start Training": "പ്രശിക്ഷണം ആരംഭിക്കുക", - "Stop Training & Restart Applio": "പ്രശിക്ഷണം നിർത്തുക & അപ്ലിയോ പുനഃപ്രാരംഭിക്കുക", - "Generate Index": "ഇൻഡെക്സ് സൃഷ്ടിക്കുക", - - "Export Model": "എക്സ്പോർട്ട് മോഡൽ", - "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'അപ്ലോഡ്' ബട്ടൺ കേവലം ഗൂഗിൾ കോളാബിന് മാത്രം: നിങ്ങളുടെ ഗൂഗിൾ ഡ്രൈവിലെ ApplioExported ഫോൾഡറിലേക്ക് എക്സ്പോർട്ട് ചെയ്യുന്നു.", - "Exported Pth file": "എക്സ്പോർട്ട് ചെയ്ത Pth ഫയൽ", - "Exported Index file": "എക്സ്പോർട്ട് ചെയ്ത ഇൻഡെക്സ് ഫയൽ", - "Select the pth file to be exported": "എക്സ്പോർട്ട് ചെയ്യാൻ ആദ്യം pth ഫയൽ തിരഞ്ഞെടുക്കുക", - "Select the index file to be exported": "എക്സ്പോർട്ട് ചെയ്യാൻ ആദ്യം ഇൻഡെക്സ് ഫയൽ തിരഞ്ഞെടുക്കുക", - "Upload": "അപ്ലോഡ്", - - "Voice Model": "വോയ്സ് മോഡൽ", - "Select the voice model to use for the conversion.": "കണ്വേർഷനിനായി ഉപയോഗിക്കാൻ വോയ്സ് മോഡലുകൾ തിരഞ്ഞെടുക്കുക.", - "Index File": "ഇൻഡെക്സ് ഫയൽ", - "Select the index file to use for the conversion.": "കണ്വേർഷനിനായി ഉപയോഗിക്കേണ്ട ഇൻഡെക്സ് ഫയലുകൾ തിരഞ്ഞെടുക്കുക.", - "Refresh": "പുനഃസ്വന്തമാക്കുക", - "Unload Voice": "വോയ്സ് അൺലോഡ്", - "Single": "ഏകത്വം", - "Upload Audio": "ഓഡിയോ അപ്‌ലോഡ് ചെയ്യുക", - "Select Audio": "ഓഡിയോ തിരഞ്ഞെടുക്കുക", - "Select the audio to convert.": "കണ്വേർട്ട് ചെയ്യാൻ ഓഡിയോ തിരഞ്ഞെടുക്കുക.", - "Advanced Settings": "പുതുമയായ സെറ്റിംഗുകൾ", - "Clear Outputs (Deletes all audios in assets/audios)": "പരിമാറ്റുക (assets/audios എല്ലാ ഓഡിയോകൾ ഇല്ലാതാക്കുക)", - "Custom Output Path": "കസ്റ്റം ഔട്ട്പുട്ട് പാത", - "Output Path": "ഔട്ട്പുട്ട് പാത", - "The path where the output audio will be saved, by default in assets/audios/output.wav": "ഓട്ട്പുട്ട് ഓഡിയോ സേവ്‌ചെയ്യപ്പെടുന്നത്, സ്വഭാവമായി assets/audios/output.wav ഇല്‍", - "Split Audio": "ഓഡിയോ വിഭാഗീകരണം", - "Split the audio into chunks for inference to obtain better results in some cases.": "അനുമാനത്തിന് കൂടുതൽ ഫലങ്ങൾ ലഭിക്കാൻ ഓഡിയോ ഭാഗങ്ങൾക്ക് വിഭാഗീകരണം ചെയ്യുക.", - "Autotune": "ഓട്ടോട്യൂൺ", - "Apply a soft autotune to your inferences, recommended for singing conversions.": "സോഫ്റ്റ് ഓട്ടോട്യൂൺ ആപ്ലയുകയും, സിംഗിങ് കൺവേർഷനുകളില്‍ ശിഫാരസ് ചെയ്യുകയും.", - "Clean Audio": "ശുദ്ധമായ ഓഡിയോ", - "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ശുദ്ധമായി നോയിസ് ഡിറ്റക്‌ഷൻ ആൾഗോരിതങ്ങൾ ഉപയോഗിച്ച് നിനക്ക് എത്ര പ്രയോജനപ്രദമായ ഓഡിയോ പരിഷ്കരിക്കാൻ, സ്പീക്കിംഗ് ഓഡിയോക്കിന് ശിഫാരസ് ചെയ്യുക.", - "Clean Strength": "ശുദ്ധി ശക്തി", - "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "നിനക്ക് അവശ്യമായ ഓഡിയോക്ക് ശുദ്ധിയുടെ നില സജ്ജീവമാക്കുക, അതെക്കും കൂടുതൽ ഉള്ളതും അതിനെക്കുറിച്ച് ചോദിക്കുന്നതെന്തെങ്കിലും ശുദ്ധി ചെയ്തിരിക്കുന്ന ഓഡിയോ കമ്പ്രസ്‌ഡ് ആയിരിക്കാനുള്ള സാധ്യതയുണ്ട്.", - "Pitch": "പിച്ച്", - "Set the pitch of the audio, the higher the value, the higher the pitch.": "ഓഡിയോയുടെ പിച്ച് സജ്ജീവമാക്കുക, അതെക്കും ഉയരുന്നുവെങ്കിലും പിച്ച്.", - "Filter Radius": "ഫിൽട്ടർ റേഡിയസ്", - "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "സംഖ്യ 3 അല്ലെങ്കിൽ അതിനേക്കാൾ കൂടുതൽ ആയിരിക്കുന്നാല്‍, ശ്വസനം കുറയ്ക്കാന്‍ ശേഷിക്കുന്ന രീതിയില്‍ കൂടുതല്‍ കഴിവുള്ളതാണ്.", - "Search Feature Ratio": "സേര്‍ച്ച് ഫീച്ചർ റേഷ്യോ", - "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "ഇനഡെക്സ് ഫയലായി വികസിക്കുന്ന പ്രഭാവം; ഉയര്‍ന്ന മൂല്യം ഉയരത്തിന് അനുബന്ധ പ്രഭാവമുള്ളതാണ്. എനിക്ക് കുറഞ്ഞ മൂല്യങ്ങളെ അനുവദിക്കാനും ആര്‍ടിഫാക്ടുകള്‍ നിലവിലുള്ള ഓഡിയോയിലെ ബുദ്ധിമുട്ടുകൾ ഉപയോഗപ്പെടുന്നു.", - "Volume Envelope": "വോള്യൂം എൻവലപ്പ്", - "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "ആഉട്ട്പുട്ട് ഒറ്റവന്നേറ്റത്തിന്റെ വോള്യൂം എൻവലപ്പ് സ്ഥലപ്പെടുത്തുക. അനുഭവം 1-ക്കു സമീപമായിരിക്കുന്നതും, അനുഭവ എൻവലപ്പ് ഉപയോഗപ്പെടുന്നതും കൂടുതൽ ആണ്.", - "Protect Voiceless Consonants": "വോയ്സ്‌ലസ് കോൺസനന്റുകൾ സംരക്ഷിക്കുക", - "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "എല്ലാവര്‍ക്കും പ്രകടമായ കോൺസനന്റുകൾ ഒഴുകുന്നത് എന്നതുകൊണ്ടുതന്നെ ഇലക്ട്രോ-ഓക്കുസ്റ്റിക് കൊതിയും മറ്റു ആർട്ടിഫാക്ടുകളും പ്രതിരക്ഷിക്കുന്നതിനുള്ള അരികോട്. പാരാമീറ്ററിനെ അതിന്റെ 0.5 എന്നേക്കും ഉച്ചക്കൊണ്ട് കൂട്ടിക്കൊള്ളൽ സാമൂഹ്യപ്രതിരക്ഷ നൽകുന്നു. എന്നാല്‍, ഈ മൂല്യം കുറഞ്ഞാക്കാൻ സാധ്യതയുണ്ട്, പ്രതിരക്ഷണം താഴെ കുറഞ്ഞുകൂടാൻ സഹായകരമാവുക.", - "Pitch extraction algorithm": "പിച്ച് എക്സ്ട്രാക്ഷൻ ആൾഗോരിതം", - "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ഓഡിയോ കൺവേർഷനിനായി ഉപയോഗിക്കേണ്ട പിച്ച് എക്സ്ട്രാക്ഷൻ ആൾഗോരിതം. സ്വതന്ത്ര ആൾഗോരിതത്താണ് rmvpe, അത് പലതരത്തിലുള്ള പ്രസ്താവനകളില്‍ ശിഫാരസ് ചെയ്യപ്പെടുന്നു.", - - "Convert": "കൺവേർട്ട് ചെയ്യുക", - "Export Audio": "ഓഡിയോ എക്സ്പോർട്ട് ചെയ്യുക", - - "Batch": "ബാച്ച്", - "Input Folder": "ഇൻപുട്ട് ഫോൾഡർ", - "Select the folder containing the audios to convert.": "കൺവേർട്ട് ചെയ്യാൻ ഓഡിയോകളെ കാണുന്ന ഫോൾഡർ തിരഞ്ഞെടുക്കുക.", - "Enter input path": "ഇൻപുട്ട് പാത നൽകുക", - "Output Folder": "ഔട്ട്പുട്ട് ഫോൾഡർ", - "Select the folder where the output audios will be saved.": "ഔട്ട്പുട്ട് ഓഡിയോകൾ സേവ്‌ചെയ്യപ്പെടുന്ന ഫോൾഡർ തിരഞ്ഞെടുക്കുക.", - "Enter output path": "ഔട്ട്പുട്ട് പാത നൽകുക", - - "Get information about the audio": "ഓഡിയോയുടെ കുറിപ്പ് നേടുക", - - "## Voice Blender": "## വോയ്സ് ബ്ലെന്ഡർ", - "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "രണ്ട് വോയ്സ് മോഡലുകൾ തിരഞ്ഞെടുക്കുക, നിനക്ക് ആഗ്രഹിക്കുന്ന ബ്ലെന്റ് ശതകം സജ്ജീവമാക്കുക, അവയുടെ ബ്ലെന്റും പൂർണ്ണമായും പുതിയ ഒരു വോയ്സായാക്കുക.", - "Voice Blender": "വോയ്സ് ബ്ലെന്ഡർ", - "Drag and drop your model here": "നിനക്ക് ശൈലിക്കുകയോരോ മോഡൽ ഇവിടെ വികസിപ്പിക്കുക", - "You can also use a custom path.": "നിനക്ക് ഒരു സ്വന്തമായ പാതയും ഉപയോഗിക്കാം.", - "Blend Ratio": "ബ്ലെന്റ് അനുപാതം", - "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "സ്ഥാനം കൊണ്ടുകൂടുതൽ പ്രതിരൂപമാക്കാൻ മുന്നേറ്റം ഒന്നിലേറ്റെത്തിനു അല്ലെങ്കിൽ മറ്റൊന്നിലേറ്റെത്തിനു സാധിക്കും.", - "Fusion": "ഫ്യൂഷൻ", - "Path to Model": "മോഡലിന്റെ പാത", - "Enter path to model": "മോഡലിനെ സജ്ജീവമാക്കാൻ പാത നൽകുക", - "Model information to be placed": "പ്ലേസ്മെന്റ് ചെയ്യാൻ ആവശ്യമായ മോഡലിന്റെ വിവരം", - "Introduce the model information": "മോഡലിന്റെ വിവരം പരിചയപ്പെടുക", - "The information to be placed in the model (You can leave it blank or put anything).": "മോഡലില്‍ സ്ഥലപ്പെടുത്താനുള്ള വിവരം (നിനക്ക് വിടാവുകയും അല്ലെങ്കിൽ എന്തെങ്കിലും ചേരുകയും ചെയ്യാം).", - "View model information": "മോഡലിന്റെ വിവരം കാണുക", - "Introduce the model pth path": "മോഡലിന്റെ pth പാത പരിചയപ്പെടുക", - "View": "കാണുക", - "Model extraction": "മോഡൽ എക്സ്ട്രാക്ഷൻ", - "Model conversion": "മോഡൽ കൺവേർഷൻ", - "Pth file": "Pth ഫയൽ", - "Output of the pth file": "Pth ഫയലിന്റെ പ്രോഡക്റ്റ്", - "# How to Report an Issue on GitHub": "# GitHub-ലെ ഒരു ഇഷ്യൂ റിപ്പോർട്ട് ചെയ്യുന്നതിനുള്ള രീതി", - "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. നിന്റെ അനുഭവപ്പെടുന്ന ഇഷ്യൂ റെക്കോർഡുചെയ്യുന്നതിന് താഴെ 'Record Screen' ബട്ടൺ ക്ലിക്കുചെയ്യുക.", - "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ഇഷ്യൂ റെക്കോർഡുചെയ്തുതീർക്കുന്നതിനുശേഷം, 'Stop Recording' ബട്ടൺ ക്ലിക്കുചെയ്യുക (അത് തുടർന്നിരിക്കുന്നുമോ എന്ന് താഴെയോ കാണുന്ന ലേബല്‍ അനുസരിച്ച് മാറുന്നു).", - "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) സ്ഥലത്തേക്ക് പോകുക, 'New Issue' ബട്ടൺ ക്ലിക്കുചെയ്യുക.", - "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. നൽകിയ ഇഷ്യൂ ടെംപ്ലേറ്റ് പൂർത്തിയാക്കുക, ആവശ്യമായ വിവരങ്ങളെ ചേർക്കുന്നതിനുശേഷം, പഴയ ഘടനയിൽ റെക്കോർഡുചെയ്ത ഫയൽ അപ്‌ലോഡ് ചെയ്യുന്നതിന് എസെറ്റ് വിഭാഗം ഉപയോഗിക്കുക.", - "Record Screen": "റെക്കോർഡ് സ്‌ക്രീൻ", - "Record": "റെക്കോർഡ്", - "Stop Recording": "റെക്കോർഡുനിർത്തുക", - "Introduce the model .pth path": "മോഡൽ .pth പാത പരിചയപ്പെടുക", - "See Model Information": "മോഡൽ വിവരങ്ങൾ കാണുക", - "## Download Model": "## മോഡൽ ഡൗൺലോഡ്", - "Model Link": "മോഡൽ ലിങ്ക്", - "Introduce the model link": "മോഡൽ ലിങ്ക് പരിചയപ്പെടുക", - "Download Model": "മോഡൽ ഡൗൺലോഡ്", - "## Drop files": "## ഫയലുകൾ ഇടുക", - "## Search Model": "## മോഡൽ തിരയൽ", - "Search": "തിരയൽ", - "Introduce the model name to search.": "തിരയുന്നതിനായി മോഡൽ പേര് അറിയിക്കുക.", - "We couldn't find models by that name.": "അനുബന്ധമായ പേരിൽ മോഡൽസ് കണ്ടെത്താനായില്ല.", - - "Drag your .pth file and .index file into this space. Drag one and then the other.": "നിനക്ക് .pth ഫയലുകളും .index ഫയലുകളും ഇവിടെ ഡ്രാഗ് ചെയ്യുക. ഒന്നുകിട്ട് പിന്നെ മറ്റൊന്നു ഡ്രാഗ് ചെയ്യുക.", - "TTS Voices": "TTS വോയ്സുകൾ", - "Select the TTS voice to use for the conversion.": "മാറ്റത്തിനായി ഉപയോഗിക്കാൻ TTS വോയ്സ് തിരഞ്ഞെടുക്കുക.", - "Text to Synthesize": "സിന്തിയസൈസ് ചെയ്യുന്ന ടെക്സ്റ്റ്", - "Enter the text to synthesize.": "സിന്തിയസൈസ് ചെയ്യാൻ ടെക്സ്റ്റ് നൽകുക.", - "Or you can upload a .txt file": "അല്ലെങ്കിൽ .txt ഫയൽ അപ്‌ലോഡ് ചെയ്യാം", - "Enter text to synthesize": "സിന്തിയസൈസ് ചെയ്യാൻ ടെക്സ്റ്റ് നൽകുക", - "Output Path for TTS Audio": "TTS ഓഡിയോക്ക് ഔട്ട്പുട്ട് പാത", - "Output Path for RVC Audio": "RVC ഓഡിയോക്ക് ഔട്ട്പുട്ട് പാത", - "Enable Applio integration with Discord presence": "Discord പ്രസന്നതയോട് Applio ഇൻറഗ്രേഷൻ സജീവമാക്കുക", - "It will activate the possibility of displaying the current Applio activity in Discord.": "ഇത് Discord-നായിരിക്കുന്ന നിലാവ് കാണാനുള്ള സാധ്യത സജീവമാക്കും.", - "Enable Applio integration with applio.org/models using flask": "flask ഉപയോഗിച്ച് applio.org/models ഇൻറഗ്രേഷൻ Applio സജീവമാക്കുക", - "It will activate the possibility of downloading models with a click from the website.": "ഇത് വെബ്സൈറ്റിൽ ഒരു ക്ലിക്ക് ചെയ്യുമ്പോൾ മോഡലുകൾ ഡൗൺലോഡ് ചെയ്യാനുള്ള സാധ്യത സജീവമാക്കും.", - "Enable fake GPU": "വഞ്ചി ജിപിയു ഇയക്കുക", - "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "പ്രശിക്ഷണം തറന്ന് നിലവിലുള്ളതിന് ജിപിയു ഇല്ലാതെ പ്രസ്താവിക്കുന്നതിനായി തിരഞ്ഞെടുക്കുന്നത് അനുവദിക്കാൻ 'ഫെയ്ക് ജിപിയു' ഓപ്ഷൻ സജ്ജമാക്കുക എന്ന് ക്രമീകരിക്കാൻ തിരിച്ചുവരുക. ", - "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "പ്രശിക്ഷണം തുടങ്ങുന്നു. എങ്കിലും, ദയവായി ശ്രദ്ധിക്കുക എന്നത് നിങ്ങളുടെ ഉപകരണത്തിൽ GPU സാധ്യതകൾ ഇല്ലാത്തതാണ്, അതിനാൽ പ്രശിക്ഷണം അനുവദനീയമല്ല. ഈ ഓപ്ഷൻ ഇപ്പോൾ പരീക്ഷണങ്ങളിക്കായാണ്. (ഈ ഓപ്ഷൻ അപ്ലിയോ പുനഃസജ്ജമാക്കും)", - "Theme": "തീം", - "Select the theme you want to use. (Requires restarting Applio)": "നിനക്ക് ഉപയോഗിക്കാൻ എന്താണെന്ന് നിങ്ങളുടെ തീം തിരഞ്ഞെടുക്കുക. (Applio പുനഃസജീവമാക്കാൻ ആവശ്യമാണ്)", - "Language": "ഭാഷ", - "Select the language you want to use. (Requires restarting Applio)": "നിങ്ങളുടെ ഉപയോഗത്തിന് നിങ്ങളുടെ ഭാഷ തിരഞ്ഞെടുക്കുക. (Applio പുനഃസജീവമാക്കാൻ ആവശ്യമാണ്)", - "Plugin Installer": "പ്ലഗിൻ ഇൻസ്റ്റാൾലർ", - "Drag your plugin.zip to install it": "അതിനായി നിനക്ക് നിന്നെത്തിയ .zip ഫയല്‍ ഇൻസ്റ്റാൾ ചെയ്യാൻ ഇവിടെ ഡ്രാഗ് ചെയ്യുക", - "Version Checker": "വേര്‍ഷന്‍ ചെക്കർ", - "Check which version of Applio is the latest to see if you need to update.": "നിനക്ക് അപ്‌ഡേറ്റുചെയ്യേണ്ടതോ എന്ന് അപ്‌ലിയോയുടെ ഏറ്റവും പുതിയ പതിപ്പായത് പരിശോധിക്കുക.", - "Check for updates": "അപ്‌ഡേറ്റുകൾ പരിശോധിക്കുക" -} +{ + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "അനന്തമായ ശക്തി, മോഡുലാരിറ്റി, ഉപയോക്തൃ-സൗഹൃദ അനുഭവത്തിനായി യാഥാർത്ഥ്യമാക്കിയ അത്യുന്നതമായ വോയ്സ് ക്ലോണിങ് ടൂൾ.\n[മലയാളത്തിലേക്ക് വായന: Enes](https://discord.com/users/1140031358006202468)", + "This section contains some extra utilities that often may be in experimental phases.": "ഈ പേരിലെ കൊണ്ടാടാൻ പ്രയോജനപ്രദമായ എന്നതിനാൽ കഴിഞ്ഞത് സാധാരണയായ പോസിക്കുകളിൽ അവസാനിക്കാത്ത ചില യന്ത്രങ്ങൾ ഉള്ളതാണ്.", + "Output Information": "പ്രണാമം വിവരം", + "The output information will be displayed here.": "ഇവിടെ പ്രണയ വിവരങ്ങൾ പ്രദശിപ്പിക്കപ്പെടും.", + "Inference": "സൂചന", + "Train": "പ്രശിക്ഷണം", + "Extra": "അധികം", + "Merge Audios": "ഓഡിയോ ഒടിക്കുക", + "Processing": "പ്രൊസസ്സിംഗ്", + "Audio Analyzer": "ഓഡിയോ വിശ്ലേഷണകൾ", + "Model Information": "മോഡൽ വിവരങ്ങൾ", + "Plugins": "പ്ലഗിൻസ്", + "Download": "ഡൗൺലോഡ്", + "Report a Bug": "പിശക് റിപ്പോർട്ട്", + "Settings": "സെറ്റിംഗുകൾ", + "Preprocess": "പ്രൊസസ്", + "Model Name": "മോഡൽ പേര്", + "Name of the new model.": "പുതിയ മോഡലിന്റെ പേര്.", + "Enter model name": "മോഡൽ പേര് നൽകുക", + "Dataset Path": "ഡാറ്റാസെറ്റ് പാത", + "Path to the dataset folder.": "ഡാറ്റാസെറ്റ് ഫോൾഡർക്കുള്ള പാത.", + "Refresh Datasets": "ഡാറ്റാസെറ്റുകൾ പുനഃസൃഷ്ടിക്കുക", + "Dataset Creator": "ഡാറ്റാസെറ്റ് സൃഷ്ടാവ്", + "Dataset Name": "ഡാറ്റാസെറ്റ് പേര്", + "Name of the new dataset.": "പുതിയ ഡാറ്റാസെറ്റിന്റെ പേര്.", + "Enter dataset name": "ഡാറ്റാസെറ്റ് പേര് നൽകുക", + "Upload Audio Dataset": "ഓഡിയോ ഡാറ്റാസെറ്റ് അപ്‌ലോഡ് ചെയ്യുക", + "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ഓഡിയോ ഫയൽ യഥാർത്ഥമായി ഡാറ്റാസെറ്റിലേക്ക് ചേർന്നു. ദയവായി പ്രൊസെസ് ബട്ടൺ അമർത്തുക.", + "Enter dataset path": "ഡാറ്റാസെറ്റ് പാത നൽകുക", + "Sampling Rate": "സാമ്പ്ലിംഗ് റേറ്റ്", + "The sampling rate of the audio files.": "ഓഡിയോ ഫയലുകളുടെ സാമ്പ്ലിംഗ് റേറ്റ്.", + "Model Architecture": "RVC പതിപ്പ്", + "Version of the model architecture.": "മോഡലിന്റെ RVC പതിപ്പ്.", + "Preprocess Dataset": "ഡാറ്റാസെറ്റ് പ്രൊസെസ് ചെയ്യുക", + + "Embedder Model": "Embedder Model", + "Model used for learning speaker embedding.": "സ്പീക്കർ എംബെഡ്ഡിംഗ് പഠിപ്പിക്കാൻ ഉപയോഗിക്കുന്ന മോഡൽ.", + "Extract": "എക്സ്ട്രാക്ട്", + "Hop Length": "ഹോപ്പ് ലെന്ത്", + "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "സിസ്റ്റത്തിൽ ഒരു വിശദ പിച്ച് മാറ്റത്തിന്റെ ദാരിദ്ര്യം സൂചിപ്പിക്കുന്നു. ചെറിയ ഹോപ്പ് ലെന്തുകള് ഇൻഫരൻസിനായി കൂടുതൽ സമയം ആവശ്യപ്പെടുന്നു, എന്നിരുന്നാലും ഉയരമായ പിച്ച് അക്ക്യൂറസി നൽകുന്നു.", + "Batch Size": "ബാച്ച് വലുപ്പം", + "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "നിനക്ക് ലഭ്യമായ GPU-യുടെ VRAM നുസരിച്ച് അലൈൻ ചെയ്യുന്നത് പ്രാധാന്യപ്പെടുന്നു. 4-ന്റെ സെറ്റിംഗ് മൊത്തം അക്ക്യൂറസി പരിഷ്കർത്തനവും എല്ലാത്തിനെയും അവലംബപ്പെടുന്നു, എന്നിരുന്നാലും 8 വലുപ്പം അനുഭവജീവനത്തിനായി ഉന്നയിക്കുന്നു.", + "Save Every Epoch": "എന്നാൽ എന്റെ എപ്പൊക്കാലിലെയും മോഡൽ സേവ് ചെയ്യുന്നതിനു വരെ", + "Determine at how many epochs the model will be saved at.": "എന്താണ് എപ്പൊക്കാലിൽ മോഡൽ സേവ് ചെയ്യപ്പെടുന്നതെന്ന് തിരഞ്ഞെടുക്കുക.", + "Total Epoch": "മൊത്തം എപ്പൊക്ക്", + "Specifies the overall quantity of epochs for the model training process.": "മോഡൽ പ്രശിക്ഷണ പ്രക്രിയയ്ക്കായി എപ്പൊക്ക് എത്രയാണ് എന്നത് നിര്ദിഷ്ടമാക്കുന്നു.", + "Pretrained": "പ്രീട്രെയിനെഡ്", + "Save Only Latest": "കേവലായി പുതിയത് മാത്രം സേവ് ചെയ്യുക", + "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "ഈ സെറ്റിംഗ് പ്രവർത്തനത്തിനായി ക്രമീകരിച്ചാൽ, G മറ്റും D ഫയലുകൾ അവരുടെ അവസാന പതിപ്പുകൾക്ക് മാത്രം സേവ് ചെയ്യും, പ്രഭാവകരമായി സ്റ്റോറേജ് സ്ഥലം സംരക്ഷിക്കുന്നതാണ്.", + "Save Every Weights": "എന്റെ എപ്പൊക്കാലിലും ഭാരം സേവ് ചെയ്യുക", + "This setting enables you to save the weights of the model at the conclusion of each epoch.": "ഈ സെറ്റിംഗ് നിര്ദ്ദേശപ്പെടുന്ന ഓരോ എപ്പൊക്കിലും മോഡലിന്റെ ഭാരങ്ങൾ സേവ് ചെയ്യാൻ കഴിയുന്നു.", + "Custom Pretrained": "കസ്റ്റം പ്രീട്രെയിനെഡ്", + "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "കസ്റ്റം പ്രീട്രെയിനെഡ് മോഡലുകൾ ഉപയോഗിക്കുന്നത് ഉന്നത ഫലങ്ങൾ നൽകിയാൽ, സ്പഷ്ടമായ ഉപയോഗ കേസിനായി കൈമേലെ പ്രീട്രെയിനെഡ് മോഡലുകൾ തിരഞ്ഞെടുക്കുന്നത് സാധാരണയായ പ്രദർശനത്തെ വളർത്തുന്നതിനും പ്രദർശനം വളർത്തുന്നതിനും കൂടുതൽ സഹായകമാകും.", + "Upload Pretrained Model": "പ്രീട്രെയിനെഡ് മോഡൽ അപ്ലോഡ് ചെയ്യുക", + "Refresh Custom Pretraineds": "കസ്റ്റം പ്രീട്രെയിനെഡുകൾ പുനഃസ്വന്തമാക്കുക", + "Pretrained Custom Settings": "പ്രീട്രെയിനെഡ് കസ്റ്റം സെറ്റിംഗുകൾ", + "The file you dropped is not a valid pretrained file. Please try again.": "നിനക്ക് ഡ്രോപ്പ് ചെയ്യിയ ഫയൽ ഒരു സാധാരണ പ്രീട്രെയിനെഡ് ഫയലല്ല. ദയവായി വീണ്ടും ശ്രയിക്കുക.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "പ്രീട്രെയിനെഡ് ഫയലെ ഡ്രോപ്പ്ഡൌൺ മെനുവിലെത്താൻ റെഫ്രഷ് ബട്ടൺ ക്ലിക്കുചെയ്യുക.", + "Pretrained G Path": "കസ്റ്റം പ്രീട്രെയിനെഡ് G പാത", + "Pretrained D Path": "കസ്റ്റം പ്രീട്രെയിനെഡ് D പാത", + "GPU Settings": "GPU സെറ്റിംഗുകൾ", + "Sets advanced GPU settings, recommended for users with better GPU architecture.": "അട്വാൻസ്ഡ് GPU സെറ്റിംഗുകൾ സജ്ജീവമാക്കുന്നു, പ്രശസ്ത GPU ആർക്കിടെയുള്ള ഉപയോക്താക്കളിനായി ശിഫാരസ് ചെയ്തത്.", + "GPU Custom Settings": "GPU കസ്റ്റം സെറ്റിംഗുകൾ", + "GPU Number": "GPU നമ്പർ", + "0 to ∞ separated by -": "0 മുതൽ ∞ വരെ - ഒടുക്കിയ", + "GPU Information": "GPU വിവരം", + "Pitch Guidance": "പിച്ച് ഗൈഡൻസ്", + "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "പിച്ച് ഗൈഡൻസ് ഉപയോഗിച്ച്, ഓരിജിനൽ വോയ്സിന്റെ ഇൻറോണേഷനെ, അതിന്റെ പിച്ചു സേവനേന്ന്, സോണ്റിംഗ് മുതലായ സന്നിധികളിൽ പ്രാഥമിക സ്വരം അല്ലെങ്കിൽ പിച്ച് നമ്പറെ പരിരക്ഷിക്കേണ്ടതായ സ്ഥിതികളിലേക്ക് മികച്ച ഫലങ്ങൾ പ്രാപ്തമാക്കാനാകുന്നു.", + "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "നിനക്ക് സ്വന്തമായി പ്രശിക്ഷണം നടത്തുമ്പോൾ പ്രീട്രെയിനെഡ് മോഡലുകൾ ഉപയോഗിക്കുക. ഈ രീതി പ്രശിക്ഷണ സമയം കുറയ്ക്കുന്നുവെങ്കിൽ മൊത്തം ഗുണമേന്മ വരും.", + "Extract Features": "ഫീച്ചർ എക്സ്ട്രാക്ട്", + "Overtraining Detector": "ഓവർട്രെയിനിംഗ് ഡിറ്റക്റ്റർ", + "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "പ്രശിക്ഷണ ഡാറ്റയെ നിങ്ങളുടെ മോഡൽ അതിന്റെ തരംതിരിച്ചു പഠിക്കാൻ അനുവദിക്കുന്നത് നിലവിൽ നിന്ന് ഓവർട്രെയിനിംഗ് ശ്രമിക്കുക.", + "Overtraining Detector Settings": "ഓവർട്രെയിനിംഗ് ഡിറ്റക്റ്റർ സെറ്റിംഗുകൾ", + "Overtraining Threshold": "ഓവർട്രെയിനിംഗ് താഴ്ന്ന മിതം", + "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "യാത്രാവധി പരിശോധിച്ചിട്ടില്ലാത്ത അഭിവൃദ്ധി നിരയെടുക്കുകയാണെങ്കിൽ നിങ്ങളുടെ മോഡൽ പരിശോധനയെന്നത് പ്രശിക്ഷണം നിലനിൽക്കാനുള്ള ഏറ്റവും ഉന്നത എപ്പോക്കുകൾ സജ്ജമാക്കുക.", + + "Start Training": "പ്രശിക്ഷണം ആരംഭിക്കുക", + "Stop Training & Restart Applio": "പ്രശിക്ഷണം നിർത്തുക & അപ്ലിയോ പുനഃപ്രാരംഭിക്കുക", + "Generate Index": "ഇൻഡെക്സ് സൃഷ്ടിക്കുക", + + "Export Model": "എക്സ്പോർട്ട് മോഡൽ", + "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'അപ്ലോഡ്' ബട്ടൺ കേവലം ഗൂഗിൾ കോളാബിന് മാത്രം: നിങ്ങളുടെ ഗൂഗിൾ ഡ്രൈവിലെ ApplioExported ഫോൾഡറിലേക്ക് എക്സ്പോർട്ട് ചെയ്യുന്നു.", + "Exported Pth file": "എക്സ്പോർട്ട് ചെയ്ത Pth ഫയൽ", + "Exported Index file": "എക്സ്പോർട്ട് ചെയ്ത ഇൻഡെക്സ് ഫയൽ", + "Select the pth file to be exported": "എക്സ്പോർട്ട് ചെയ്യാൻ ആദ്യം pth ഫയൽ തിരഞ്ഞെടുക്കുക", + "Select the index file to be exported": "എക്സ്പോർട്ട് ചെയ്യാൻ ആദ്യം ഇൻഡെക്സ് ഫയൽ തിരഞ്ഞെടുക്കുക", + "Upload": "അപ്ലോഡ്", + + "Voice Model": "വോയ്സ് മോഡൽ", + "Select the voice model to use for the conversion.": "കണ്വേർഷനിനായി ഉപയോഗിക്കാൻ വോയ്സ് മോഡലുകൾ തിരഞ്ഞെടുക്കുക.", + "Index File": "ഇൻഡെക്സ് ഫയൽ", + "Select the index file to use for the conversion.": "കണ്വേർഷനിനായി ഉപയോഗിക്കേണ്ട ഇൻഡെക്സ് ഫയലുകൾ തിരഞ്ഞെടുക്കുക.", + "Refresh": "പുനഃസ്വന്തമാക്കുക", + "Unload Voice": "വോയ്സ് അൺലോഡ്", + "Single": "ഏകത്വം", + "Upload Audio": "ഓഡിയോ അപ്‌ലോഡ് ചെയ്യുക", + "Select Audio": "ഓഡിയോ തിരഞ്ഞെടുക്കുക", + "Select the audio to convert.": "കണ്വേർട്ട് ചെയ്യാൻ ഓഡിയോ തിരഞ്ഞെടുക്കുക.", + "Advanced Settings": "പുതുമയായ സെറ്റിംഗുകൾ", + "Clear Outputs (Deletes all audios in assets/audios)": "പരിമാറ്റുക (assets/audios എല്ലാ ഓഡിയോകൾ ഇല്ലാതാക്കുക)", + "Custom Output Path": "കസ്റ്റം ഔട്ട്പുട്ട് പാത", + "Output Path": "ഔട്ട്പുട്ട് പാത", + "The path where the output audio will be saved, by default in assets/audios/output.wav": "ഓട്ട്പുട്ട് ഓഡിയോ സേവ്‌ചെയ്യപ്പെടുന്നത്, സ്വഭാവമായി assets/audios/output.wav ഇല്‍", + "Split Audio": "ഓഡിയോ വിഭാഗീകരണം", + "Split the audio into chunks for inference to obtain better results in some cases.": "അനുമാനത്തിന് കൂടുതൽ ഫലങ്ങൾ ലഭിക്കാൻ ഓഡിയോ ഭാഗങ്ങൾക്ക് വിഭാഗീകരണം ചെയ്യുക.", + "Autotune": "ഓട്ടോട്യൂൺ", + "Apply a soft autotune to your inferences, recommended for singing conversions.": "സോഫ്റ്റ് ഓട്ടോട്യൂൺ ആപ്ലയുകയും, സിംഗിങ് കൺവേർഷനുകളില്‍ ശിഫാരസ് ചെയ്യുകയും.", + "Clean Audio": "ശുദ്ധമായ ഓഡിയോ", + "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "ശുദ്ധമായി നോയിസ് ഡിറ്റക്‌ഷൻ ആൾഗോരിതങ്ങൾ ഉപയോഗിച്ച് നിനക്ക് എത്ര പ്രയോജനപ്രദമായ ഓഡിയോ പരിഷ്കരിക്കാൻ, സ്പീക്കിംഗ് ഓഡിയോക്കിന് ശിഫാരസ് ചെയ്യുക.", + "Clean Strength": "ശുദ്ധി ശക്തി", + "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "നിനക്ക് അവശ്യമായ ഓഡിയോക്ക് ശുദ്ധിയുടെ നില സജ്ജീവമാക്കുക, അതെക്കും കൂടുതൽ ഉള്ളതും അതിനെക്കുറിച്ച് ചോദിക്കുന്നതെന്തെങ്കിലും ശുദ്ധി ചെയ്തിരിക്കുന്ന ഓഡിയോ കമ്പ്രസ്‌ഡ് ആയിരിക്കാനുള്ള സാധ്യതയുണ്ട്.", + "Pitch": "പിച്ച്", + "Set the pitch of the audio, the higher the value, the higher the pitch.": "ഓഡിയോയുടെ പിച്ച് സജ്ജീവമാക്കുക, അതെക്കും ഉയരുന്നുവെങ്കിലും പിച്ച്.", + "Filter Radius": "ഫിൽട്ടർ റേഡിയസ്", + "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "സംഖ്യ 3 അല്ലെങ്കിൽ അതിനേക്കാൾ കൂടുതൽ ആയിരിക്കുന്നാല്‍, ശ്വസനം കുറയ്ക്കാന്‍ ശേഷിക്കുന്ന രീതിയില്‍ കൂടുതല്‍ കഴിവുള്ളതാണ്.", + "Search Feature Ratio": "സേര്‍ച്ച് ഫീച്ചർ റേഷ്യോ", + "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "ഇനഡെക്സ് ഫയലായി വികസിക്കുന്ന പ്രഭാവം; ഉയര്‍ന്ന മൂല്യം ഉയരത്തിന് അനുബന്ധ പ്രഭാവമുള്ളതാണ്. എനിക്ക് കുറഞ്ഞ മൂല്യങ്ങളെ അനുവദിക്കാനും ആര്‍ടിഫാക്ടുകള്‍ നിലവിലുള്ള ഓഡിയോയിലെ ബുദ്ധിമുട്ടുകൾ ഉപയോഗപ്പെടുന്നു.", + "Volume Envelope": "വോള്യൂം എൻവലപ്പ്", + "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "ആഉട്ട്പുട്ട് ഒറ്റവന്നേറ്റത്തിന്റെ വോള്യൂം എൻവലപ്പ് സ്ഥലപ്പെടുത്തുക. അനുഭവം 1-ക്കു സമീപമായിരിക്കുന്നതും, അനുഭവ എൻവലപ്പ് ഉപയോഗപ്പെടുന്നതും കൂടുതൽ ആണ്.", + "Protect Voiceless Consonants": "വോയ്സ്‌ലസ് കോൺസനന്റുകൾ സംരക്ഷിക്കുക", + "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "എല്ലാവര്‍ക്കും പ്രകടമായ കോൺസനന്റുകൾ ഒഴുകുന്നത് എന്നതുകൊണ്ടുതന്നെ ഇലക്ട്രോ-ഓക്കുസ്റ്റിക് കൊതിയും മറ്റു ആർട്ടിഫാക്ടുകളും പ്രതിരക്ഷിക്കുന്നതിനുള്ള അരികോട്. പാരാമീറ്ററിനെ അതിന്റെ 0.5 എന്നേക്കും ഉച്ചക്കൊണ്ട് കൂട്ടിക്കൊള്ളൽ സാമൂഹ്യപ്രതിരക്ഷ നൽകുന്നു. എന്നാല്‍, ഈ മൂല്യം കുറഞ്ഞാക്കാൻ സാധ്യതയുണ്ട്, പ്രതിരക്ഷണം താഴെ കുറഞ്ഞുകൂടാൻ സഹായകരമാവുക.", + "Pitch extraction algorithm": "പിച്ച് എക്സ്ട്രാക്ഷൻ ആൾഗോരിതം", + "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ഓഡിയോ കൺവേർഷനിനായി ഉപയോഗിക്കേണ്ട പിച്ച് എക്സ്ട്രാക്ഷൻ ആൾഗോരിതം. സ്വതന്ത്ര ആൾഗോരിതത്താണ് rmvpe, അത് പലതരത്തിലുള്ള പ്രസ്താവനകളില്‍ ശിഫാരസ് ചെയ്യപ്പെടുന്നു.", + + "Convert": "കൺവേർട്ട് ചെയ്യുക", + "Export Audio": "ഓഡിയോ എക്സ്പോർട്ട് ചെയ്യുക", + + "Batch": "ബാച്ച്", + "Input Folder": "ഇൻപുട്ട് ഫോൾഡർ", + "Select the folder containing the audios to convert.": "കൺവേർട്ട് ചെയ്യാൻ ഓഡിയോകളെ കാണുന്ന ഫോൾഡർ തിരഞ്ഞെടുക്കുക.", + "Enter input path": "ഇൻപുട്ട് പാത നൽകുക", + "Output Folder": "ഔട്ട്പുട്ട് ഫോൾഡർ", + "Select the folder where the output audios will be saved.": "ഔട്ട്പുട്ട് ഓഡിയോകൾ സേവ്‌ചെയ്യപ്പെടുന്ന ഫോൾഡർ തിരഞ്ഞെടുക്കുക.", + "Enter output path": "ഔട്ട്പുട്ട് പാത നൽകുക", + + "Get information about the audio": "ഓഡിയോയുടെ കുറിപ്പ് നേടുക", + + "## Voice Blender": "## വോയ്സ് ബ്ലെന്ഡർ", + "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "രണ്ട് വോയ്സ് മോഡലുകൾ തിരഞ്ഞെടുക്കുക, നിനക്ക് ആഗ്രഹിക്കുന്ന ബ്ലെന്റ് ശതകം സജ്ജീവമാക്കുക, അവയുടെ ബ്ലെന്റും പൂർണ്ണമായും പുതിയ ഒരു വോയ്സായാക്കുക.", + "Voice Blender": "വോയ്സ് ബ്ലെന്ഡർ", + "Drag and drop your model here": "നിനക്ക് ശൈലിക്കുകയോരോ മോഡൽ ഇവിടെ വികസിപ്പിക്കുക", + "You can also use a custom path.": "നിനക്ക് ഒരു സ്വന്തമായ പാതയും ഉപയോഗിക്കാം.", + "Blend Ratio": "ബ്ലെന്റ് അനുപാതം", + "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "സ്ഥാനം കൊണ്ടുകൂടുതൽ പ്രതിരൂപമാക്കാൻ മുന്നേറ്റം ഒന്നിലേറ്റെത്തിനു അല്ലെങ്കിൽ മറ്റൊന്നിലേറ്റെത്തിനു സാധിക്കും.", + "Fusion": "ഫ്യൂഷൻ", + "Path to Model": "മോഡലിന്റെ പാത", + "Enter path to model": "മോഡലിനെ സജ്ജീവമാക്കാൻ പാത നൽകുക", + "Model information to be placed": "പ്ലേസ്മെന്റ് ചെയ്യാൻ ആവശ്യമായ മോഡലിന്റെ വിവരം", + "Introduce the model information": "മോഡലിന്റെ വിവരം പരിചയപ്പെടുക", + "The information to be placed in the model (You can leave it blank or put anything).": "മോഡലില്‍ സ്ഥലപ്പെടുത്താനുള്ള വിവരം (നിനക്ക് വിടാവുകയും അല്ലെങ്കിൽ എന്തെങ്കിലും ചേരുകയും ചെയ്യാം).", + "View model information": "മോഡലിന്റെ വിവരം കാണുക", + "Introduce the model pth path": "മോഡലിന്റെ pth പാത പരിചയപ്പെടുക", + "View": "കാണുക", + "Model extraction": "മോഡൽ എക്സ്ട്രാക്ഷൻ", + "Model conversion": "മോഡൽ കൺവേർഷൻ", + "Pth file": "Pth ഫയൽ", + "Output of the pth file": "Pth ഫയലിന്റെ പ്രോഡക്റ്റ്", + "# How to Report an Issue on GitHub": "# GitHub-ലെ ഒരു ഇഷ്യൂ റിപ്പോർട്ട് ചെയ്യുന്നതിനുള്ള രീതി", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. നിന്റെ അനുഭവപ്പെടുന്ന ഇഷ്യൂ റെക്കോർഡുചെയ്യുന്നതിന് താഴെ 'Record Screen' ബട്ടൺ ക്ലിക്കുചെയ്യുക.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. ഇഷ്യൂ റെക്കോർഡുചെയ്തുതീർക്കുന്നതിനുശേഷം, 'Stop Recording' ബട്ടൺ ക്ലിക്കുചെയ്യുക (അത് തുടർന്നിരിക്കുന്നുമോ എന്ന് താഴെയോ കാണുന്ന ലേബല്‍ അനുസരിച്ച് മാറുന്നു).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) സ്ഥലത്തേക്ക് പോകുക, 'New Issue' ബട്ടൺ ക്ലിക്കുചെയ്യുക.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. നൽകിയ ഇഷ്യൂ ടെംപ്ലേറ്റ് പൂർത്തിയാക്കുക, ആവശ്യമായ വിവരങ്ങളെ ചേർക്കുന്നതിനുശേഷം, പഴയ ഘടനയിൽ റെക്കോർഡുചെയ്ത ഫയൽ അപ്‌ലോഡ് ചെയ്യുന്നതിന് എസെറ്റ് വിഭാഗം ഉപയോഗിക്കുക.", + "Record Screen": "റെക്കോർഡ് സ്‌ക്രീൻ", + "Record": "റെക്കോർഡ്", + "Stop Recording": "റെക്കോർഡുനിർത്തുക", + "Introduce the model .pth path": "മോഡൽ .pth പാത പരിചയപ്പെടുക", + "See Model Information": "മോഡൽ വിവരങ്ങൾ കാണുക", + "## Download Model": "## മോഡൽ ഡൗൺലോഡ്", + "Model Link": "മോഡൽ ലിങ്ക്", + "Introduce the model link": "മോഡൽ ലിങ്ക് പരിചയപ്പെടുക", + "Download Model": "മോഡൽ ഡൗൺലോഡ്", + "## Drop files": "## ഫയലുകൾ ഇടുക", + "## Search Model": "## മോഡൽ തിരയൽ", + "Search": "തിരയൽ", + "Introduce the model name to search.": "തിരയുന്നതിനായി മോഡൽ പേര് അറിയിക്കുക.", + "We couldn't find models by that name.": "അനുബന്ധമായ പേരിൽ മോഡൽസ് കണ്ടെത്താനായില്ല.", + + "Drag your .pth file and .index file into this space. Drag one and then the other.": "നിനക്ക് .pth ഫയലുകളും .index ഫയലുകളും ഇവിടെ ഡ്രാഗ് ചെയ്യുക. ഒന്നുകിട്ട് പിന്നെ മറ്റൊന്നു ഡ്രാഗ് ചെയ്യുക.", + "TTS Voices": "TTS വോയ്സുകൾ", + "Select the TTS voice to use for the conversion.": "മാറ്റത്തിനായി ഉപയോഗിക്കാൻ TTS വോയ്സ് തിരഞ്ഞെടുക്കുക.", + "Text to Synthesize": "സിന്തിയസൈസ് ചെയ്യുന്ന ടെക്സ്റ്റ്", + "Enter the text to synthesize.": "സിന്തിയസൈസ് ചെയ്യാൻ ടെക്സ്റ്റ് നൽകുക.", + "Or you can upload a .txt file": "അല്ലെങ്കിൽ .txt ഫയൽ അപ്‌ലോഡ് ചെയ്യാം", + "Enter text to synthesize": "സിന്തിയസൈസ് ചെയ്യാൻ ടെക്സ്റ്റ് നൽകുക", + "Output Path for TTS Audio": "TTS ഓഡിയോക്ക് ഔട്ട്പുട്ട് പാത", + "Output Path for RVC Audio": "RVC ഓഡിയോക്ക് ഔട്ട്പുട്ട് പാത", + "Enable Applio integration with Discord presence": "Discord പ്രസന്നതയോട് Applio ഇൻറഗ്രേഷൻ സജീവമാക്കുക", + "It will activate the possibility of displaying the current Applio activity in Discord.": "ഇത് Discord-നായിരിക്കുന്ന നിലാവ് കാണാനുള്ള സാധ്യത സജീവമാക്കും.", + "Enable Applio integration with applio.org/models using flask": "flask ഉപയോഗിച്ച് applio.org/models ഇൻറഗ്രേഷൻ Applio സജീവമാക്കുക", + "It will activate the possibility of downloading models with a click from the website.": "ഇത് വെബ്സൈറ്റിൽ ഒരു ക്ലിക്ക് ചെയ്യുമ്പോൾ മോഡലുകൾ ഡൗൺലോഡ് ചെയ്യാനുള്ള സാധ്യത സജീവമാക്കും.", + "Enable fake GPU": "വഞ്ചി ജിപിയു ഇയക്കുക", + "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "പ്രശിക്ഷണം തറന്ന് നിലവിലുള്ളതിന് ജിപിയു ഇല്ലാതെ പ്രസ്താവിക്കുന്നതിനായി തിരഞ്ഞെടുക്കുന്നത് അനുവദിക്കാൻ 'ഫെയ്ക് ജിപിയു' ഓപ്ഷൻ സജ്ജമാക്കുക എന്ന് ക്രമീകരിക്കാൻ തിരിച്ചുവരുക. ", + "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "പ്രശിക്ഷണം തുടങ്ങുന്നു. എങ്കിലും, ദയവായി ശ്രദ്ധിക്കുക എന്നത് നിങ്ങളുടെ ഉപകരണത്തിൽ GPU സാധ്യതകൾ ഇല്ലാത്തതാണ്, അതിനാൽ പ്രശിക്ഷണം അനുവദനീയമല്ല. ഈ ഓപ്ഷൻ ഇപ്പോൾ പരീക്ഷണങ്ങളിക്കായാണ്. (ഈ ഓപ്ഷൻ അപ്ലിയോ പുനഃസജ്ജമാക്കും)", + "Theme": "തീം", + "Select the theme you want to use. (Requires restarting Applio)": "നിനക്ക് ഉപയോഗിക്കാൻ എന്താണെന്ന് നിങ്ങളുടെ തീം തിരഞ്ഞെടുക്കുക. (Applio പുനഃസജീവമാക്കാൻ ആവശ്യമാണ്)", + "Language": "ഭാഷ", + "Select the language you want to use. (Requires restarting Applio)": "നിങ്ങളുടെ ഉപയോഗത്തിന് നിങ്ങളുടെ ഭാഷ തിരഞ്ഞെടുക്കുക. (Applio പുനഃസജീവമാക്കാൻ ആവശ്യമാണ്)", + "Plugin Installer": "പ്ലഗിൻ ഇൻസ്റ്റാൾലർ", + "Drag your plugin.zip to install it": "അതിനായി നിനക്ക് നിന്നെത്തിയ .zip ഫയല്‍ ഇൻസ്റ്റാൾ ചെയ്യാൻ ഇവിടെ ഡ്രാഗ് ചെയ്യുക", + "Version Checker": "വേര്‍ഷന്‍ ചെക്കർ", + "Check which version of Applio is the latest to see if you need to update.": "നിനക്ക് അപ്‌ഡേറ്റുചെയ്യേണ്ടതോ എന്ന് അപ്‌ലിയോയുടെ ഏറ്റവും പുതിയ പതിപ്പായത് പരിശോധിക്കുക.", + "Check for updates": "അപ്‌ഡേറ്റുകൾ പരിശോധിക്കുക" +} diff --git a/assets/i18n/languages/mr_MR.json b/assets/i18n/languages/mr_MR.json index d0b16d3a3b09635c63c5ec0ce3839f88cb8f1ea8..fd5726fa11e1b34cc32648f0279acdfecacb5157 100644 --- a/assets/i18n/languages/mr_MR.json +++ b/assets/i18n/languages/mr_MR.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "अल्टिमेट व्हॉइस क्लोनिंग टूल, अप्रतिम शक्ती, मॉड्युलरिटी आणि वापरकर्ता-अनुकूल अनुभवासाठी काळजीपूर्वक ऑप्टिमाइझ केलेले.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "अल्टिमेट व्हॉइस क्लोनिंग टूल, अप्रतिम शक्ती, मॉड्युलरिटी आणि वापरकर्ता-अनुकूल अनुभवासाठी काळजीपूर्वक ऑप्टिमाइझ केलेले.", "This section contains some extra utilities that often may be in experimental phases.": "या विभागात काही अतिरिक्त उपयोगिता आहेत ज्या बर्याचदा प्रायोगिक टप्प्यात असू शकतात.", "Output Information": "आउटपुट माहिती", "The output information will be displayed here.": "आउटपुट माहिती येथे प्रदर्शित केली जाईल.", @@ -30,8 +30,8 @@ "Enter dataset path": "डेटासेट मार्ग प्रविष्ट करा", "Sampling Rate": "नमुना दर", "The sampling rate of the audio files.": "ऑडिओ फायलींचे नमुने घेण्याचा दर.", - "RVC Version": "आरव्हीसी आवृत्ती", - "The RVC version of the model.": "मॉडेलची आरव्हीसी आवृत्ती.", + "Model Architecture": "आरव्हीसी आवृत्ती", + "Version of the model architecture.": "मॉडेलची आरव्हीसी आवृत्ती.", "Preprocess Dataset": "Preprocess Dataset", "Extract": "अर्क", "Hop Length": "हॉप लांबी", diff --git a/assets/i18n/languages/ms_MS.json b/assets/i18n/languages/ms_MS.json index c8b46d94635c7f2dfc29189903639ddfb133a810..4c89bed5a31a202bb9ffb14bda4d774ea190d64a 100644 --- a/assets/i18n/languages/ms_MS.json +++ b/assets/i18n/languages/ms_MS.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Alat pengklonan suara muktamad, dioptimumkan dengan teliti untuk kuasa yang tiada tandingan, modulariti, dan pengalaman mesra pengguna.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Alat pengklonan suara muktamad, dioptimumkan dengan teliti untuk kuasa yang tiada tandingan, modulariti, dan pengalaman mesra pengguna.", "This section contains some extra utilities that often may be in experimental phases.": "Bahagian ini mengandungi beberapa utiliti tambahan yang selalunya berada dalam fasa percubaan.", "Output Information": "Maklumat Output", "The output information will be displayed here.": "Maklumat output akan dipaparkan di sini.", @@ -30,8 +30,8 @@ "Enter dataset path": "Memasukkan laluan set data", "Sampling Rate": "Kadar Persampelan", "The sampling rate of the audio files.": "Kadar pensampelan fail audio.", - "RVC Version": "Versi RVC", - "The RVC version of the model.": "Versi RVC model.", + "Model Architecture": "Versi RVC", + "Version of the model architecture.": "Versi RVC model.", "Preprocess Dataset": "Set Data Praproses", "Extract": "Cabutan", "Hop Length": "Panjang Hop", diff --git a/assets/i18n/languages/nl_NL.json b/assets/i18n/languages/nl_NL.json index 16692bbc0d4137c880ae4c5ac8659f2866f923af..938a719a7f913cc88345d296bf8dc4bcaf5fd41b 100644 --- a/assets/i18n/languages/nl_NL.json +++ b/assets/i18n/languages/nl_NL.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Ultieme tool voor het klonen van stemmen, zorgvuldig geoptimaliseerd voor ongeëvenaarde kracht, modulariteit en gebruiksvriendelijke ervaring.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Ultieme tool voor het klonen van stemmen, zorgvuldig geoptimaliseerd voor ongeëvenaarde kracht, modulariteit en gebruiksvriendelijke ervaring.", "This section contains some extra utilities that often may be in experimental phases.": "Deze sectie bevat enkele extra hulpprogramma's die zich vaak in experimentele fasen bevinden.", "Output Information": "Output Informatie", "The output information will be displayed here.": "De uitvoerinformatie wordt hier weergegeven.", @@ -30,8 +30,8 @@ "Enter dataset path": "Pad naar gegevensset invoeren", "Sampling Rate": "Bemonsteringsfrequentie", "The sampling rate of the audio files.": "De bemonsteringsfrequentie van de audiobestanden.", - "RVC Version": "RVC-versie", - "The RVC version of the model.": "De RVC-versie van het model.", + "Model Architecture": "RVC-versie", + "Version of the model architecture.": "De RVC-versie van het model.", "Preprocess Dataset": "Gegevensset voor het proces", "Extract": "Extract", "Hop Length": "Hop Lengte", diff --git a/assets/i18n/languages/pa_PA.json b/assets/i18n/languages/pa_PA.json index acd58f85baefb402384ccdca2a0cf6912bca4842..21199db597a7785acf08968714cb4d5808844a8c 100644 --- a/assets/i18n/languages/pa_PA.json +++ b/assets/i18n/languages/pa_PA.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "ਅੰਤਮ ਵੌਇਸ ਕਲੋਨਿੰਗ ਟੂਲ, ਬੇਮਿਸਾਲ ਸ਼ਕਤੀ, ਮਾਡਿਊਲਰਿਟੀ, ਅਤੇ ਉਪਭੋਗਤਾ-ਅਨੁਕੂਲ ਅਨੁਭਵ ਲਈ ਧਿਆਨ ਨਾਲ ਅਨੁਕੂਲ ਬਣਾਇਆ ਗਿਆ ਹੈ.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "ਅੰਤਮ ਵੌਇਸ ਕਲੋਨਿੰਗ ਟੂਲ, ਬੇਮਿਸਾਲ ਸ਼ਕਤੀ, ਮਾਡਿਊਲਰਿਟੀ, ਅਤੇ ਉਪਭੋਗਤਾ-ਅਨੁਕੂਲ ਅਨੁਭਵ ਲਈ ਧਿਆਨ ਨਾਲ ਅਨੁਕੂਲ ਬਣਾਇਆ ਗਿਆ ਹੈ.", "This section contains some extra utilities that often may be in experimental phases.": "ਇਸ ਭਾਗ ਵਿੱਚ ਕੁਝ ਵਾਧੂ ਉਪਯੋਗਤਾਵਾਂ ਹਨ ਜੋ ਅਕਸਰ ਪ੍ਰਯੋਗਾਤਮਕ ਪੜਾਵਾਂ ਵਿੱਚ ਹੋ ਸਕਦੀਆਂ ਹਨ।", "Output Information": "ਆਊਟਪੁੱਟ ਜਾਣਕਾਰੀ", "The output information will be displayed here.": "ਆਉਟਪੁੱਟ ਜਾਣਕਾਰੀ ਇੱਥੇ ਪ੍ਰਦਰਸ਼ਿਤ ਕੀਤੀ ਜਾਵੇਗੀ।", @@ -30,8 +30,8 @@ "Enter dataset path": "ਡਾਟਾਸੈਟ ਪਾਥ ਦਾਖਲ ਕਰੋ", "Sampling Rate": "ਨਮੂਨੇ ਲੈਣ ਦੀ ਦਰ", "The sampling rate of the audio files.": "ਆਡੀਓ ਫਾਇਲਾਂ ਦੀ ਨਮੂਨੇ ਲੈਣ ਦੀ ਦਰ।", - "RVC Version": "RVC ਸੰਸਕਰਣ", - "The RVC version of the model.": "ਮਾਡਲ ਦਾ ਆਰਵੀਸੀ ਸੰਸਕਰਣ.", + "Model Architecture": "RVC ਸੰਸਕਰਣ", + "Version of the model architecture.": "ਮਾਡਲ ਦਾ ਆਰਵੀਸੀ ਸੰਸਕਰਣ.", "Preprocess Dataset": "ਪ੍ਰੀਪ੍ਰੋਸੈਸ ਡੇਟਾਸੈਟ", "Extract": "ਐਕਸਟਰੈਕਟ", "Hop Length": "ਹੌਪ ਲੰਬਾਈ", diff --git a/assets/i18n/languages/pl_PL.json b/assets/i18n/languages/pl_PL.json index e61bd25277dc2538752f20b75ee7b3645a89f797..09c14ee62c9b34a0fad157495c4845d881accd54 100644 --- a/assets/i18n/languages/pl_PL.json +++ b/assets/i18n/languages/pl_PL.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Najlepsze narzędzie do klonowania głosu, skrupulatnie zoptymalizowane pod kątem niezrównanej mocy, modułowości i przyjazności dla użytkownika.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Najlepsze narzędzie do klonowania głosu, skrupulatnie zoptymalizowane pod kątem niezrównanej mocy, modułowości i przyjazności dla użytkownika.", "This section contains some extra utilities that often may be in experimental phases.": "Ta sekcja zawiera kilka dodatkowych narzędzi, które często mogą znajdować się w fazie eksperymentalnej.", "Output Information": "Informacje wyjściowe", "The output information will be displayed here.": "W tym miejscu zostaną wyświetlone informacje wyjściowe.", @@ -30,8 +30,8 @@ "Enter dataset path": "Wprowadź ścieżkę zestawu danych", "Sampling Rate": "Częstotliwość próbkowania", "The sampling rate of the audio files.": "Częstotliwość próbkowania plików audio.", - "RVC Version": "Wersja RVC", - "The RVC version of the model.": "Wersja modelu RVC.", + "Model Architecture": "Wersja RVC", + "Version of the model architecture.": "Wersja modelu RVC.", "Preprocess Dataset": "Wstępne przetwarzanie zestawu danych", "Extract": "Ekstrakt", "Hop Length": "Długość chmielu", diff --git a/assets/i18n/languages/pt_BR.json b/assets/i18n/languages/pt_BR.json index 180b4a485cf83163b1cbc2c2e3c41bb5c6b6a9d0..d61efdf04d18a231dedb159a9d7e753191abfc88 100644 --- a/assets/i18n/languages/pt_BR.json +++ b/assets/i18n/languages/pt_BR.json @@ -1,113 +1,308 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "A melhor ferramenta de clonagem de voz, meticulosamente otimizada para potência incomparável, modularidade e experiência amigável.", - "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém alguns utilitários extras que muitas vezes podem estar em fases experimentais.", - "Output Information": "Informações de saída", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Conversão de Voz baseada em VITS focada em simplicidade, qualidade e desempenho.", + "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém algumas utilidades extras que muitas vezes podem estar em fases experimentais.", + "Output Information": "Informações de Saída", + "The output information will be displayed here.": "As informações de saída serão exibidas aqui.", "Inference": "Inferência", "Train": "Treinar", "Extra": "Extra", - "Merge Audios": "Mesclar áudios", - "Processing": "Processamento", - "Audio Analyzer": "Analisador de áudio", - "Model Information": "Informação do modelo", + "Merge Audios": "Mesclar Áudios", + "Processing": "Processando", + "Audio Analyzer": "Analisador de Áudio", + "Model Information": "Informações do Modelo", + "Plugins": "Plugins", "Download": "Baixar", "Report a Bug": "Reportar um Bug", - "Preprocess": "Pré-processo", - "Model Name": "Nome do modelo", - "Enter model name": "Insira o nome do modelo", + "Settings": "Configurações", + "Preprocess": "Pré-processar", + "Audio cutting": "Corte de Áudio", + "It's recommended to deactivate this option if your dataset has already been processed.": "Recomenda-se desativar esta opção se seu dataset já foi processado.", + "Process effects": "Processar efeitos", + "Model Name": "Nome do Modelo", + "Name of the new model.": "Nome do novo modelo.", + "Enter model name": "Digite o nome do modelo", "Dataset Path": "Caminho do dataset", - "Enter dataset path": "Insira o caminho do dataset", - "Sampling Rate": "Taxa de amostragem", - "RVC Version": "Versão RVC", + "Path to the dataset folder.": "Caminho para a pasta do dataset.", + "Refresh Datasets": "Atualizar datasets", + "Dataset Creator": "Criador de dataset", + "Dataset Name": "Nome do dataset", + "Name of the new dataset.": "Nome do novo dataset.", + "Enter dataset name": "Digite o nome do dataset", + "Upload Audio Dataset": "Carregar dataset de Áudio", + "The audio file has been successfully added to the dataset. Please click the preprocess button.": "O arquivo de áudio foi adicionado com sucesso ao dataset. Por favor, clique no botão de pré-processamento.", + "Enter dataset path": "Digite o caminho do dataset", + "Sampling Rate": "Sampling Rate", + "The sampling rate of the audio files.": "O sampling rate dos arquivos de áudio.", + "Model Architecture": "Versão do RVC", + "Version of the model architecture.": "A versão do RVC do modelo.", "Preprocess Dataset": "Pré-processar dataset", + "Embedder Model": "Modelo de Embedding", + "Model used for learning speaker embedding.": "Modelo usado para aprender a incorporação do locutor.", "Extract": "Extrair", - "Hop Length": "Comprimento do Hop", - "Batch Size": "Tamanho do lote", - "Save Every Epoch": "Salve Cada Epoch", - "Total Epoch": "Epoch Total", - "Pretrained": "Pré-treinamento", - "Save Only Latest": "Salvar Apenas o último", - "Save Every Weights": "Salvar todos os Weights", - "Custom Pretrained": "Pretrain personalizado", - "Upload Pretrained Model": "Carregar Pretrain", - "Pretrained Custom Settings": "Configurações personalizadas do pretrain", - "The file you dropped is not a valid pretrained file. Please try again.": "O arquivo que você soltou não é um arquivo de pretrain válido. Por favor, tente novamente.", - "Click the refresh button to see the pretrained file in the dropdown menu.": "Clique no botão Atualizar para ver o arquivo pretrain no menu suspenso.", - "Pretrained G Path": "Personalizado Pré-treinado G", - "Pretrained D Path": "Personalizado Pré-treinado D", - "GPU Settings": "Configurações da GPU", - "GPU Custom Settings": "Configurações personalizadas da GPU", + "Hop Length": "Hop Length", + "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Denota a duração que o sistema leva para transitar para uma mudança significativa de tom. Comprimentos de salto menores requerem mais tempo para inferência, mas tendem a proporcionar maior precisão de tom.", + "Batch Size": "Tamanho do Lote", + "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "É aconselhável alinhá-lo com a VRAM disponível da sua GPU. Uma configuração de 4 oferece maior precisão, mas processamento mais lento, enquanto 8 proporciona resultados mais rápidos e padrão.", + "Save Every Epoch": "Salvar a Cada Epochs", + "Determine at how many epochs the model will saved at.": "Determine em quantas epochs o modelo será salvo.", + "Total Epoch": "Total de Epochs", + "Specifies the overall quantity of epochs for the model training process.": "Especifica a quantidade total de epochs para o processo de treinamento do modelo.", + "Pretrained": "Pré-treino", + "Save Only Latest": "Salvar Apenas o Mais Recente", + "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Ativar esta configuração fará com que os arquivos G e D salvem apenas suas versões mais recentes, economizando espaço de armazenamento.", + "Save Every Weights": "Salvar Todos os Arquivos de modelo", + "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Esta configuração permite salvar os arquivos de modelos ao final de cada epoch.", + "Custom Pretrained": "Pré-treino Personalizado", + "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Utilizar modelos Pré-treino personalizados pode levar a resultados superiores, pois selecionar os modelos Pré-treino mais adequados para o caso específico pode melhorar significativamente o desempenho.", + "Upload Pretrained Model": "Carregar Modelo Pré-treino", + "Refresh Custom Pretraineds": "Atualizar Pré-treino Personalizados", + "Pretrained Custom Settings": "Configurações Personalizadas de Pré-treino", + "The file you dropped is not a valid pretrained file. Please try again.": "O arquivo que você soltou não é um arquivo Pré-treino válido. Por favor, tente novamente.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Clique no botão de atualizar para ver o arquivo Pré-treino no menu suspenso.", + "Pretrained G Path": "Caminho do Pré-treino G", + "Pretrained D Path": "Caminho do Pré-treino D", + "GPU Settings": "Configurações de GPU", + "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Define configurações avançadas de GPU, recomendadas para usuários com melhor arquitetura de GPU.", + "GPU Custom Settings": "Configurações Personalizadas de GPU", "GPU Number": "Número da GPU", "0 to ∞ separated by -": "0 a ∞ separados por -", + "The GPU information will be displayed here.": "As informações da GPU serão exibidas aqui.", + "Specify the number of GPUs you wish to utilize for preprocess by entering them separated by hyphens (-). At the moment, using multi-gpu will not have a significant effect.": "Especifique o número de GPUs que você deseja utilizar para pré-processamento, inserindo-os separados por hífens (-). No momento, usar várias GPUs não terá um efeito significativo.", + "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-).": "Especifique o número de GPUs que você deseja utilizar para extração, inserindo-os separados por hífens (-).", + "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases.": "O número de núcleos de CPU a serem usados no pré-processamento. A configuração padrão são seus núcleos de CPU, o que é recomendado para a maioria dos casos.", + "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases.": "O número de núcleos de CPU a serem usados no processo de extração. A configuração padrão são seus núcleos de CPU, o que é recomendado para a maioria dos casos.", "GPU Information": "Informações da GPU", - "Pitch Guidance": "Orientação de Pitch", - "Extract Features": "Extrair recursos", + "Pitch Guidance": "Orientação de Tom", + "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Ao empregar a orientação de tom, torna-se viável espelhar a entonação da voz original, incluindo seu tom. Este recurso é particularmente valioso para canto e outros cenários onde preservar a melodia ou padrão de tom original é essencial.", + "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Utilize modelos Pré-treino ao treinar o seu próprio. Esta abordagem reduz a duração do treinamento e melhora a qualidade geral.", + "Extract Features": "Extrair Recursos", + "We prioritize running the model extraction on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "Prioritizamos a execução da extração do modelo na GPU para um desempenho mais rápido. Se você preferir usar a CPU, simplesmente deixe o campo da GPU em branco.", + "We prioritize running the model preprocessing on the GPU for faster performance. If you prefer to use the CPU, simply leave the GPU field blank.": "Prioritizamos a execução do pré-processamento do modelo na GPU para um desempenho mais rápido. Se você preferir usar a CPU, simplesmente deixe o campo da GPU em branco.", + "Overtraining Detector": "Detector de Overtrain", + "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "Detecte o Overtrain para evitar que o modelo aprenda os dados de treinamento muito bem e perca a capacidade de generalizar para novos dados.", + "Overtraining Detector Settings": "Configurações do Detector de Overtrain", + "Overtraining Threshold": "Limite de Overtrain", + "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "Defina o número máximo de épocas que você deseja que seu modelo pare de treinar se nenhuma melhoria for detectada.", + "Sync Graph": "Sincronizar Gráfico", + "Synchronize the graph of the tensorboard. Only enable this setting if you are training a new model.": "Sincronize o gráfico do tensorboard. Ative esta configuração apenas se você estiver treinando um novo modelo.", "Start Training": "Iniciar Treinamento", - "Generate Index": "Gerar Index", - "Voice Model": "Modelo de voz", - "Index File": "Arquivo de Index", + "Stop Training": "Parar Treinamento", + "Generate Index": "Gerar index", + "Export Model": "Exportar Modelo", + "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "O botão 'Carregar' é apenas para google colab: Carrega os arquivos exportados para a pasta ApplioExported no seu Google Drive.", + "Exported Pth file": "Arquivo Pth Exportado", + "Exported Index file": "Arquivo de index Exportado", + "Select the pth file to be exported": "Selecione o arquivo pth a ser exportado", + "Select the index file to be exported": "Selecione o arquivo de index a ser exportado", + "Upload": "Carregar", + "Voice Model": "Modelo de Voz", + "Select the voice model to use for the conversion.": "Selecione o modelo de voz a ser usado para a conversão.", + "Index File": "Arquivo de index", + "Select the index file to use for the conversion.": "Selecione o arquivo de índice a ser usado para a conversão.", "Refresh": "Atualizar", - "Unload Voice": "Descarregar voz", - "Single": "Único", - "Upload Audio": "Carregar áudio", - "Select Audio": "Selecione Áudio", - "Advanced Settings": "Configurações avançadas", - "Clear Outputs (Deletes all audios in assets/audios)": "Limpar saídas (exclui todos os áudios em ativos/áudios)", - "Custom Output Path": "Caminho de saída personalizado", - "Output Path": "Caminho de saída", + "Unload Voice": "Descarregar Voz", + "Single": "Um arquivo", + "Upload Audio": "Carregar Áudio", + "Select Audio": "Selecionar Áudio", + "Select the audio to convert.": "Selecione o áudio a ser convertido.", + "Advanced Settings": "Configurações Avançadas", + "Clear Outputs (Deletes all audios in assets/audios)": "Limpar Saídas (Exclui todos os áudios em assets/audios)", + "Custom Output Path": "Caminho de Saída Personalizado", + "Output Path": "Caminho de Saída", + "The path where the output audio will be saved, by default in assets/audios/output.wav": "O caminho onde o áudio de saída será salvo, por padrão em assets/audios/output.wav", + "Split Audio": "Dividir Áudio", + "Split the audio into chunks for inference to obtain better results in some cases.": "Divida o áudio em partes para inferência para obter melhores resultados em alguns casos.", + "Autotune": "Autotune", + "Apply a soft autotune to your inferences, recommended for singing conversions.": "Aplique um autotune suave às suas inferências, recomendado para conversões de canto.", + "Clean Audio": "Limpar Áudio", + "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Limpe sua saída de áudio usando algoritmos de detecção de ruído, recomendado para áudios de fala.", + "Clean Strength": "Força de Limpeza", + "Upscale Audio": "Upscale Áudio", + "Upscale the audio to a higher quality, recommended for low-quality audios. (It could take longer to process the audio)": "Aprimore o áudio para uma qualidade superior, recomendado para áudios de baixa qualidade. (Pode demorar mais para processar o áudio)", + "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Defina o nível de limpeza para o áudio desejado, quanto mais você aumentar, mais ele será limpo, mas é possível que o áudio fique mais comprimido.", + "Formant Shifting": "Mudança de Formante", + "Enable formant shifting. Used for male to female and vice-versa convertions.": "Ative a mudança de formante. Usado para conversões de masculino para feminino e vice-versa.", + "Browse presets for formanting": "Procurar predefinições para formanting", + "Presets are located in /assets/formant_shift folder": "As predefinições estão localizadas na pasta /assets/formant_shift", + "Default value is 1.0": "O valor padrão é 1.0", + "Quefrency for formant shifting": "Quefrência para mudança de formante", + "Timbre for formant shifting": "Timbre para mudança de formante", "Pitch": "Pitch", - "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness": "Se >=3: aplicar filtragem mediana aos resultados do pitch colhido. O valor representa o raio do filtro e pode reduzir a soprosidade", - "Search Feature Ratio": "Proporção de recursos de Index", - "Pitch extraction algorithm": "Algoritmo de extração de pitch", + "Set the pitch of the audio, the higher the value, the higher the pitch.": "Defina o Pitch do áudio, quanto maior o valor, mais alto será o Pitch.", + "Filter Radius": "Filter Radius", + "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Se o número for maior ou igual a três, empregar filtragem mediana nos resultados do tom coletado tem o potencial de diminuir a respiração.", + "Search Feature Ratio": "Proporção de Recurso de Pesquisa", + "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Influência exercida pelo arquivo de índice; um valor mais alto corresponde a uma maior influência. No entanto, optar por valores mais baixos pode ajudar a mitigar artefatos presentes no áudio.", + "Volume Envelope": "Envelope de Volume", + "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Substitua ou misture com o envelope de volume da saída. Quanto mais próximo o valor estiver de 1, mais o envelope de saída será empregado.", + "Protect Voiceless Consonants": "Proteger Consoantes Surdas", + "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Proteja consoantes distintas e sons de respiração para evitar rasgos eletroacústicos e outros artefatos. Puxar o parâmetro para seu valor máximo de 0,5 oferece proteção abrangente. No entanto, reduzir esse valor pode diminuir a extensão da proteção, enquanto potencialmente mitiga o efeito de indexação.", + "Pitch extraction algorithm": "Algoritmo de extração de Pitch", + "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Algoritmo de extração de Pitch a ser usado para a conversão de áudio. O algoritmo padrão é rmvpe, que é recomendado para a maioria dos casos.", "Convert": "Converter", - "Export Audio": "Exportar áudio", - "Batch": "Lote", - "Input Folder": "Pasta de entrada", - "Enter input path": "Insira o caminho de entrada", - "Output Folder": "Pasta de saída", - "Enter output path": "Insira o caminho de saída", + "Export Audio": "Exportar Áudio", + "Batch": "Varios arquivos", + "Input Folder": "Pasta de Entrada", + "Select the folder containing the audios to convert.": "Selecione a pasta contendo os áudios a serem convertidos.", + "Enter input path": "Digite o caminho de entrada", + "Output Folder": "Pasta de Saída", + "Select the folder where the output audios will be saved.": "Selecione a pasta onde os áudios de saída serão salvos.", + "Enter output path": "Digite o caminho de saída", "Get information about the audio": "Obter informações sobre o áudio", - "Information about the audio file": "Informações sobre o arquivo de áudio", - "Waiting for information...": "À espera de informações...", - "Model fusion": "Fusão de modelos", - "Weight for Model A": "Peso para o modelo A", - "Whether the model has pitch guidance": "Se o modelo tem orientação de pitch", - "Model architecture version": "Versão da arquitetura do modelo", - "Path to Model A": "Caminho para o Modelo A", - "Path to Model B": "Caminho para o Modelo B", - "Path to model": "Caminho para o modelo", - "Model information to be placed": "Modelo de informação a colocar", + "## Voice Blender": "## Fusão de voz", + "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Selecione dois modelos de voz, defina a porcentagem de Mix desejada e misture-os em uma nova voz.", + "Voice Blender": "Fusão de voz", + "Drag and drop your model here": "Arraste e solte seu modelo aqui", + "You can also use a custom path.": "Você também pode usar um caminho personalizado.", + "Blend Ratio": "Taxa de Fusão", + "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "Ajustar a posição mais para um lado ou para o outro tornará o modelo mais semelhante ao primeiro ou ao segundo.", "Fusion": "Fusão", - "Modify model information": "Modificar informações do modelo", - "Path to Model": "Caminho para o modelo", - "Model information to be modified": "Modelo de informação a modificar", - "Save file name": "Guardar nome de ficheiro", - "Modify": "Modificar", + "Path to Model": "Caminho para o Modelo", + "Enter path to model": "Digite o caminho para o modelo", + "Model information to be placed": "Informações do modelo a serem colocadas", + "Inroduce the model information": "Introduza as informações do modelo", + "The information to be placed in the model (You can leave it blank or put anything).": "As informações a serem colocadas no modelo (Você pode deixar em branco ou colocar qualquer coisa).", "View model information": "Ver informações do modelo", - "View": "View", - "Model extraction": "Extração do modelo", + "Introduce the model pth path": "Introduza o caminho do modelo pth", + "View": "Ver", + "Model extraction": "Extração de modelo", "Model conversion": "Conversão de modelo", "Pth file": "Arquivo Pth", "Output of the pth file": "Saída do arquivo pth", - "# How to Report an Issue on GitHub": "# Como relatar um problema no GitHub", - "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Clique no botão 'Gravar tela' abaixo para começar a gravar o problema que você está enfrentando.", - "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Depois de terminar de gravar o problema, clique no botão 'Parar gravação' (o mesmo botão, mas a etiqueta muda dependendo se você está gravando ativamente ou não).", + "Extract F0 Curve": "Extrair Curva F0", + "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls.": "A curva f0 representa as variações na frequência base de uma voz ao longo do tempo, mostrando como o tom sobe e desce.", + "# How to Report an Issue on GitHub": "# Como Reportar um Problema no GitHub", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Clique no botão 'Gravar Tela' abaixo para começar a gravar o problema que você está enfrentando.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. Uma vez que você tenha terminado de gravar o problema, clique no botão 'Parar Gravação' (o mesmo botão, mas o rótulo muda dependendo se você está gravando ativamente ou não).", "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. Vá para [GitHub Issues](https://github.com/IAHispano/Applio/issues) e clique no botão 'New Issue'.", - "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Preencha o modelo de problema fornecido, garantindo incluir detalhes conforme necessário, e utilize a seção de ativos para carregar o arquivo gravado da etapa anterior.", - "Record Screen": "Gravar tela", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. Complete o modelo de problema fornecido, garantindo incluir detalhes conforme necessário, e utilize a seção de ativos para carregar o arquivo gravado na etapa anterior.", + "Record Screen": "Gravar Tela", "Record": "Gravar", - "Stop Recording": "Parar gravação", - "Introduce the model .pth path": "Introduza o caminho .pth do modelo", - "See Model Information": "Consulte as informações do modelo", + "Stop Recording": "Parar Gravação", + "Introduce the model .pth path": "Introduza o caminho do modelo .pth", + "See Model Information": "Ver Informações do Modelo", "## Download Model": "## Baixar Modelo", - "Model Link": "Link do modelo", + "Model Link": "Link do Modelo", "Introduce the model link": "Introduza o link do modelo", "Download Model": "Baixar Modelo", "## Drop files": "## Soltar arquivos", - "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arraste o arquivo .pth e o arquivo .index para este espaço. Arraste um e depois o outro.", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "Arraste seu arquivo .pth e arquivo .index para este espaço. Arraste um e depois o outro.", + "## Search Model": "## Buscar Modelo", + "Search": "Buscar", + "Introduce the model name to search.": "Introduza o nome do modelo para buscar.", + "We couldn't find models by that name.": "Não conseguimos encontrar modelos com esse nome.", + "## Download Pretrained Models": "## Baixar Modelos Pré-treinados", + "Select the pretrained model you want to download.": "Selecione o modelo pré-treino que você deseja baixar.", + "And select the sampling rate": "E selecione a taxa de amostragem.", "TTS Voices": "Vozes TTS", - "Text to Synthesize": "Texto a sintetizar", - "Enter text to synthesize": "Insira texto para sintetizar", - "Output Path for TTS Audio": "Caminho de saída para áudio TTS", - "Output Path for RVC Audio": "Caminho de saída para áudio RVC", - "Enable Applio integration with Discord presence": "Presença do Applio" + "TTS Speed": "Velocidade TTS", + "Increase or decrease TTS speed.": "Aumentar ou diminuir a velocidade do TTS.", + "Select the TTS voice to use for the conversion.": "Selecione a voz TTS a ser usada para a conversão.", + "Text to Synthesize": "Texto para Sintetizar", + "Enter the text to synthesize.": "Digite o texto para sintetizar.", + "Or you can upload a .txt file": "Ou você pode carregar um arquivo .txt", + "Enter text to synthesize": "Digite o texto para sintetizar", + "Output Path for TTS Audio": "Caminho de Saída para Áudio do TTS", + "Output Path for RVC Audio": "Caminho de Saída para Áudio do RVC", + "Enable Applio integration with Discord presence": "Ativar integração do Applio com presença no Discord", + "It will activate the possibility of displaying the current Applio activity in Discord.": "Isso ativará a possibilidade de exibir a atividade atual do Applio no Discord.", + "Enable Applio integration with applio.org/models using flask": "Ativar integração do Applio com applio.org/models usando flask", + "It will activate the possibility of downloading models with a click from the website.": "Isso ativará a possibilidade de baixar modelos com um clique no site.", + "Enable fake GPU": "Ativar GPU falsa", + "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "O treinamento atualmente não é suportado devido à ausência de uma GPU. Para ativar a aba de treinamento, navegue até a aba de configurações e ative a opção 'GPU Falsa'.", + "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "Ativa a aba de treinamento. No entanto, observe que este dispositivo não possui capacidades de GPU, portanto, o treinamento não é suportado. Esta opção é apenas para fins de teste. (Esta opção reiniciará o Applio)", + "Theme": "Tema", + "Select the theme you want to use. (Requires restarting Applio)": "Selecione o tema que você deseja usar. (Requer reiniciar o Applio)", + "Language": "Idioma", + "Select the language you want to use. (Requires restarting Applio)": "Selecione o idioma que você deseja usar. (Requer reiniciar o Applio)", + "Precision": "Precisão", + "Select the precision you want to use for training and inference.": "Selecione a precisão que você deseja usar para treinamento e inferência.", + "Update precision": "Atualizar precisão", + "Plugin Installer": "Instalador de Plugin", + "Drag your plugin.zip to install it": "Arraste seu plugin.zip para instalá-lo", + "Version Checker": "Verificador de Versão", + "Check which version of Applio is the latest to see if you need to update.": "Verifique qual versão do Applio é a mais recente para ver se você precisa atualizar.", + "Check for updates": "Verificar atualizações", + "Post-Process": "Pós-processamento", + "Post-process the audio to apply effects to the output.": "Pós-processar o áudio para aplicar efeitos na saída.", + "Reverb": "Reverberação", + "Apply reverb to the audio.": "Aplicar reverberação ao áudio.", + "Reverb Room Size": "Tamanho da Sala de Reverberação", + "Set the room size of the reverb.": "Defina o tamanho da sala da reverberação.", + "Reverb Damping": "Amortecimento da Reverberação", + "Set the damping of the reverb.": "Defina o amortecimento da reverberação.", + "Reverb Wet Gain": "Ganho Molhado da Reverberação", + "Set the wet gain of the reverb.": "Defina o ganho molhado da reverberação.", + "Reverb Dry Gain": "Ganho Seco da Reverberação", + "Set the dry gain of the reverb.": "Defina o ganho seco da reverberação.", + "Reverb Width": "Largura da Reverberação", + "Set the width of the reverb.": "Defina a largura da reverberação.", + "Reverb Freeze Mode": "Modo de Congelamento da Reverberação", + "Set the freeze mode of the reverb.": "Defina o modo de congelamento da reverberação.", + "Pitch Shift": "Mudança de Pitch", + "Apply pitch shift to the audio.": "Aplicar mudança de Pitch ao áudio.", + "Pitch Shift Semitones": "Semitons de Mudança de Pitch", + "Set the pitch shift semitones.": "Defina os semitons de mudança de Pitch.", + "Limiter": "Limitador", + "Apply limiter to the audio.": "Aplicar limitador ao áudio.", + "Limiter Threshold dB": "Limite do Limitador em dB", + "Set the limiter threshold dB.": "Defina o limite do limitador em dB.", + "Limiter Release Time": "Tempo de Liberação do Limitador", + "Set the limiter release time.": "Defina o tempo de liberação do limitador.", + "Gain": "Ganho", + "Apply gain to the audio.": "Aplicar ganho ao áudio.", + "Gain dB": "Ganho dB", + "Set the gain dB.": "Defina o ganho em dB.", + "Distortion": "Distorção", + "Apply distortion to the audio.": "Aplicar distorção ao áudio.", + "Distortion Gain": "Ganho de Distorção", + "Set the distortion gain.": "Defina o ganho de distorção.", + "Chorus": "Chorus", + "Apply chorus to the audio.": "Aplicar Chorus ao áudio.", + "Chorus Rate Hz": "Taxa de Chorus Hz", + "Set the chorus rate Hz.": "Defina a taxa de Chorus em Hz.", + "Chorus Depth": "Profundidade do Chorus", + "Set the chorus depth.": "Defina a profundidade do Chorus.", + "Chorus Center Delay ms": "Delay Central do Chorus ms", + "Set the chorus center delay ms.": "Defina o Delay central do Chorus em ms.", + "Chorus Feedback": "Feedback do Chorus", + "Set the chorus feedback.": "Defina o feedback do Chorus.", + "Chorus Mix": "Mix do Chorus", + "Set the chorus mix.": "Defina a Mix do Chorus.", + "Bitcrush": "Bitcrush", + "Apply bitcrush to the audio.": "Aplicar bitcrush ao áudio.", + "Bitcrush Bit Depth": "Profundidade de Bits do Bitcrush", + "Set the bitcrush bit depth.": "Defina a profundidade de bits do bitcrush.", + "Clipping": "Clipping", + "Apply clipping to the audio.": "Aplicar clipping ao áudio.", + "Clipping Threshold": "Limite de Clipping", + "Set the clipping threshold.": "Defina o limite de clipping.", + "Compressor": "Compressor", + "Apply compressor to the audio.": "Aplicar compressor ao áudio.", + "Compressor Threshold dB": "Limite do Compressor em dB", + "Set the compressor threshold dB.": "Defina o limite do compressor em dB.", + "Compressor Ratio": "Taxa do Compressor", + "Set the compressor ratio.": "Defina a taxa do compressor.", + "Compressor Attack ms": "Ataque do Compressor ms", + "Set the compressor attack ms.": "Defina o ataque do compressor em ms.", + "Compressor Release ms": "Liberação do Compressor ms", + "Set the compressor release ms.": "Defina a liberação do compressor em ms.", + "Delay": "Delay", + "Apply delay to the audio.": "Aplicar Delay ao áudio.", + "Delay Seconds": "Segundos de Delay", + "Set the delay seconds.": "Defina os segundos de Delay.", + "Delay Feedback": "Feedback do Delay", + "Set the delay feedback.": "Defina o feedback do Delay.", + "Delay Mix": "Mix do Delay", + "Set the delay mix.": "Defina a Mix do Delay.", + "Custom Embedder": "Embedder Personalizado", + "Select Custom Embedder": "Selecionar Embedder Personalizado", + "Refresh embedders": "Atualizar Embedderes", + "Folder Name": "Nome da Pasta", + "Upload .bin": "Carregar .bin", + "Upload .json": "Carregar .json", + "Move files to custom embedder folder": "Mover arquivos para a pasta de Embedder personalizado", + "model information": "informações do modelo", + "Model Creator": "Criador do Modelo", + "Name of the model creator. (Default: Unknown)": "Nome do criador do modelo. (Padrão: Unknown)" } diff --git a/assets/i18n/languages/pt_PT.json b/assets/i18n/languages/pt_PT.json index 92250725aad4e12c1c5355bbbf55c24a64b68b15..52cd7b8a38c66c3378ce98aebad927984d8d012d 100644 --- a/assets/i18n/languages/pt_PT.json +++ b/assets/i18n/languages/pt_PT.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "A melhor ferramenta de clonagem de voz, meticulosamente otimizada para potência, modularidade e experiência de fácil utilização incomparáveis.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "A melhor ferramenta de clonagem de voz, meticulosamente otimizada para potência, modularidade e experiência de fácil utilização incomparáveis.", "This section contains some extra utilities that often may be in experimental phases.": "Esta seção contém alguns utilitários extras que muitas vezes podem estar em fases experimentais.", "Output Information": "Informações de saída", "The output information will be displayed here.": "As informações de saída serão exibidas aqui.", @@ -30,8 +30,8 @@ "Enter dataset path": "Inserir caminho do conjunto de dados", "Sampling Rate": "Taxa de amostragem", "The sampling rate of the audio files.": "A taxa de amostragem dos arquivos de áudio.", - "RVC Version": "Versão RVC", - "The RVC version of the model.": "A versão RVC do modelo.", + "Model Architecture": "Versão RVC", + "Version of the model architecture.": "A versão RVC do modelo.", "Preprocess Dataset": "Pré-processar conjunto de dados", "Extract": "Extrair", "Hop Length": "Comprimento do salto", diff --git a/assets/i18n/languages/ro_RO.json b/assets/i18n/languages/ro_RO.json index 76438c76d08152a2952c22e6e92178ee6dfa69b7..990d46f34bc39c723b2535fb4de40ce58dd98b59 100644 --- a/assets/i18n/languages/ro_RO.json +++ b/assets/i18n/languages/ro_RO.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Cel mai bun instrument de clonare a vocii, optimizat meticulos pentru putere, modularitate și experiență ușor de utilizat.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Cel mai bun instrument de clonare a vocii, optimizat meticulos pentru putere, modularitate și experiență ușor de utilizat.", "This section contains some extra utilities that often may be in experimental phases.": "Această secțiune conține câteva utilități suplimentare care pot fi adesea în faze experimentale.", "Output Information": "Informații despre ieșire", "The output information will be displayed here.": "Informațiile de ieșire vor fi afișate aici.", @@ -30,8 +30,8 @@ "Enter dataset path": "Introduceți calea setului de date", "Sampling Rate": "Rata de eșantionare", "The sampling rate of the audio files.": "Rata de eșantionare a fișierelor audio.", - "RVC Version": "Versiunea RVC", - "The RVC version of the model.": "Versiunea RVC a modelului.", + "Model Architecture": "Versiunea RVC", + "Version of the model architecture.": "Versiunea RVC a modelului.", "Preprocess Dataset": "Set de date preproces", "Extract": "Extract", "Hop Length": "Lungimea hameiului", diff --git a/assets/i18n/languages/ru_RU.json b/assets/i18n/languages/ru_RU.json index b7cd9a73f7e6c55f74a2d551654fef4d8cad4880..ba4122330c48c70091590dc358dce42668dbf1da 100644 --- a/assets/i18n/languages/ru_RU.json +++ b/assets/i18n/languages/ru_RU.json @@ -1,120 +1,120 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Идеальный инструмент для клонирования голоса, тщательно оптимизированный для непревзойденной мощности, модульности и удобства использования.", - "This section contains some extra utilities that often may be in experimental phases.": "Этот раздел содержит некоторые дополнительные утилиты, которые часто находятся на экспериментальных стадиях.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Конвертация голоса на основе VITS, ориентированная на простоту, качество и производительность.", + "This section contains some extra utilities that often may be in experimental phases.": "Этот раздел содержит дополнительные утилиты, которые часто могут находиться на экспериментальных стадиях.", "Output Information": "Выходная информация", "The output information will be displayed here.": "Здесь будет отображена выходная информация.", "Inference": "Вывод", - "Train": "Поезд", - "Extra": "Дополнительный", - "Merge Audios": "Слияние аудиозаписей", + "Train": "Обучение", + "Extra": "Дополнительно", + "Merge Audios": "Слияние аудио", "Processing": "Обработка", - "Audio Analyzer": "Анализатор звука", + "Audio Analyzer": "Анализатор аудио", "Model Information": "Информация о модели", "Plugins": "Плагины", - "Download": "Загружать", + "Download": "Скачать", "Report a Bug": "Сообщить об ошибке", - "Settings": "Параметры", - "Preprocess": "Предварительной обработки", + "Settings": "Настройки", + "Preprocess": "Предварительная обработка", "Model Name": "Название модели", "Name of the new model.": "Название новой модели.", "Enter model name": "Введите название модели", "Dataset Path": "Путь к набору данных", - "Path to the dataset folder.": "Путь к папке набора данных.", - "Refresh Datasets": "Обновление наборов данных", + "Path to the dataset folder.": "Путь к папке с набором данных.", + "Refresh Datasets": "Обновить наборы данных", "Dataset Creator": "Создатель набора данных", - "Dataset Name": "Имя набора данных", - "Name of the new dataset.": "Имя нового набора данных.", - "Enter dataset name": "Введите имя набора данных", - "Upload Audio Dataset": "Загрузка набора аудиоданных", + "Dataset Name": "Название набора данных", + "Name of the new dataset.": "Название нового набора данных.", + "Enter dataset name": "Введите название набора данных", + "Upload Audio Dataset": "Загрузить набор аудиоданных", "The audio file has been successfully added to the dataset. Please click the preprocess button.": "Аудиофайл успешно добавлен в набор данных. Пожалуйста, нажмите кнопку предварительной обработки.", "Enter dataset path": "Введите путь к набору данных", "Sampling Rate": "Частота дискретизации", "The sampling rate of the audio files.": "Частота дискретизации аудиофайлов.", - "RVC Version": "Версия РВК", - "The RVC version of the model.": "Версия модели РВК.", - "Preprocess Dataset": "Набор данных предварительной обработки", - "Extract": "Экстракт", - "Hop Length": "Длина хмеля", - "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Обозначает время, необходимое системе для перехода к значительному изменению высоты тона. Меньшая длина скачка требует больше времени для логического вывода, но, как правило, обеспечивает более высокую точность шага.", - "Batch Size": "Размер партии", - "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Рекомендуется выровнять его с доступной видеопамятью вашего графического процессора. Значение 4 обеспечивает повышенную точность, но более медленную обработку, в то время как значение 8 обеспечивает более быстрые и стандартные результаты.", - "Save Every Epoch": "Сохраняйте каждую эпоху", - "Determine at how many epochs the model will saved at.": "Определите, в скольких эпохах будет сохраняться модель.", - "Total Epoch": "Общая эпоха", + "Model Architecture": "Версия RVC", + "Version of the model architecture.": "Версия RVC модели.", + "Preprocess Dataset": "Предварительная обработка набора данных", + "Extract": "Извлечь", + "Hop Length": "Длина скачка", + "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "Обозначает время, необходимое системе для перехода к значительному изменению высоты тона. Меньшая длина скачка требует больше времени для вывода, но обычно обеспечивает более высокую точность определения высоты тона.", + "Batch Size": "Размер пакета", + "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "Рекомендуется выровнять его с доступной видеопамятью вашего графического процессора. Значение 4 обеспечивает повышенную точность, но более медленную обработку, тогда как 8 обеспечивает более быстрые и стандартные результаты.", + "Save Every Epoch": "Сохранять каждую эпоху", + "Determine at how many epochs the model will saved at.": "Определите, на скольких эпохах будет сохраняться модель.", + "Total Epoch": "Общее количество эпох", "Specifies the overall quantity of epochs for the model training process.": "Задает общее количество эпох для процесса обучения модели.", - "Pretrained": "Предварительно обученный", - "Save Only Latest": "Сохранить только последние новости", - "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Включение этого параметра приведет к тому, что файлы G и D будут сохранять только свои самые последние версии, эффективно экономя место на диске.", - "Save Every Weights": "Сохраняйте все веса", + "Pretrained": "Предварительно обученные модели", + "Save Only Latest": "Сохранять только последние версии", + "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "Включение этой настройки приведет к тому, что файлы G и D будут сохранять только свои самые последние версии, что позволит эффективно экономить место на диске.", + "Save Every Weights": "Сохранять все веса", "This setting enables you to save the weights of the model at the conclusion of each epoch.": "Эта настройка позволяет сохранять весовые коэффициенты модели в конце каждой эпохи.", - "Custom Pretrained": "Пользовательский предварительно обученный", + "Custom Pretrained": "Пользовательские предварительно обученные модели", "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "Использование пользовательских предварительно обученных моделей может привести к превосходным результатам, так как выбор наиболее подходящих предварительно обученных моделей, адаптированных к конкретному сценарию использования, может значительно повысить производительность.", - "Upload Pretrained Model": "Отправка предварительно обученной модели", - "Refresh Custom Pretraineds": "Обновление пользовательских предварительно обученных объектов", - "Pretrained Custom Settings": "Предварительно обученные пользовательские параметры", - "The file you dropped is not a valid pretrained file. Please try again.": "Файл, который вы удалили, не является допустимым предварительно обученным файлом. Повторите попытку.", - "Click the refresh button to see the pretrained file in the dropdown menu.": "Нажмите кнопку обновления, чтобы увидеть предварительно обученный файл в раскрывающемся меню.", - "Pretrained G Path": "Пользовательский предварительно обученный G", - "Pretrained D Path": "Пользовательский предварительно обученный D", - "GPU Settings": "Настройки графического процессора", - "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Устанавливает расширенные настройки графического процессора, рекомендуемые для пользователей с улучшенной архитектурой графического процессора.", - "GPU Custom Settings": "Пользовательские настройки графического процессора", - "GPU Number": "Номер графического процессора", - "0 to ∞ separated by -": "от 0 до ∞ разделенных -", - "GPU Information": "Информация о графическом процессоре", - "Pitch Guidance": "Руководство по питчу", + "Upload Pretrained Model": "Загрузить предварительно обученную модель", + "Refresh Custom Pretraineds": "Обновить пользовательские предварительно обученные модели", + "Pretrained Custom Settings": "Пользовательские настройки предварительно обученных моделей", + "The file you dropped is not a valid pretrained file. Please try again.": "Файл, который вы загрузили, не является допустимым предварительно обученным файлом. Пожалуйста, попробуйте снова.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "Нажмите кнопку обновления, чтобы увидеть предварительно обученный файл в выпадающем меню.", + "Pretrained G Path": "Путь к пользовательскому предварительно обученному G", + "Pretrained D Path": "Путь к пользовательскому предварительно обученному D", + "GPU Settings": "Настройки GPU", + "Sets advanced GPU settings, recommended for users with better GPU architecture.": "Устанавливает расширенные настройки GPU, рекомендуемые для пользователей с улучшенной архитектурой GPU.", + "GPU Custom Settings": "Пользовательские настройки GPU", + "GPU Number": "Номер GPU", + "0 to ∞ separated by -": "от 0 до ∞, разделенные -", + "GPU Information": "Информация о GPU", + "Pitch Guidance": "Руководство по высоте тона", "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "Используя управление высотой тона, становится возможным отразить интонацию исходного голоса, включая его высоту. Эта функция особенно ценна для пения и других сценариев, где важно сохранить оригинальную мелодию или тональность.", "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "Используйте предварительно обученные модели при обучении своих собственных. Такой подход сокращает продолжительность обучения и повышает общее качество.", - "Extract Features": "Извлечение объектов", + "Extract Features": "Извлечь функции", "Start Training": "Начать обучение", "Generate Index": "Сгенерировать индекс", "Voice Model": "Голосовая модель", "Select the voice model to use for the conversion.": "Выберите голосовую модель, которая будет использоваться для преобразования.", "Index File": "Индексный файл", "Select the index file to use for the conversion.": "Выберите индексный файл, который будет использоваться для преобразования.", - "Refresh": "Освежать", + "Refresh": "Обновить", "Unload Voice": "Выгрузить голос", - "Single": "Единственный", + "Single": "Одиночный", "Upload Audio": "Загрузить аудио", - "Select Audio": "Выберите Аудио", + "Select Audio": "Выберите аудио", "Select the audio to convert.": "Выберите аудио для преобразования.", "Advanced Settings": "Расширенные настройки", - "Clear Outputs (Deletes all audios in assets/audios)": "Clear Outputs (Удаляет все аудиозаписи в assets/audios)", + "Clear Outputs (Deletes all audios in assets/audios)": "Очистить выходные данные (удаляет все аудио в assets/audios)", "Custom Output Path": "Пользовательский выходной путь", "Output Path": "Выходной путь", "The path where the output audio will be saved, by default in assets/audios/output.wav": "Путь, по которому будет сохранен выходной звук, по умолчанию в assets/audios/output.wav", - "Split Audio": "Разделенное аудио", + "Split Audio": "Разделить аудио", "Split the audio into chunks for inference to obtain better results in some cases.": "Разделите аудио на фрагменты для вывода, чтобы получить лучшие результаты в некоторых случаях.", "Autotune": "Автотюн", - "Apply a soft autotune to your inferences, recommended for singing conversions.": "Примените мягкую автонастройку к своим выводам, рекомендуемую для певческих преобразований.", - "Clean Audio": "Чистый звук", - "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Очистите аудиовыход с помощью алгоритмов обнаружения шума, рекомендуемых для проговаривания аудио.", - "Clean Strength": "Чистая прочность", + "Apply a soft autotune to your inferences, recommended for singing conversions.": "Примените мягкую автонастройку к вашим выводам, рекомендуемую для певческих преобразований.", + "Clean Audio": "Очистить аудио", + "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "Очистите аудиовыход с помощью алгоритмов обнаружения шума, рекомендуемых для разговорных аудио.", + "Clean Strength": "Сила очистки", "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "Установите желаемый уровень очистки звука, чем больше вы его увеличите, тем больше он будет очищаться, но возможно, что звук будет более сжатым.", - "Pitch": "Смола", + "Pitch": "Высота тона", "Set the pitch of the audio, the higher the value, the higher the pitch.": "Установите высоту звука, чем выше значение, тем выше высота тона.", "Filter Radius": "Радиус фильтра", "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "Если это число больше или равно трем, использование медианной фильтрации по собранным результатам тона может привести к снижению дыхания.", "Search Feature Ratio": "Соотношение объектов поиска", "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "Влияние, оказываемое индексным файлом; Чем выше значение, тем больше влияние. Однако выбор более низких значений может помочь смягчить артефакты, присутствующие в аудио.", - "Volume Envelope": "Огибающая объема", + "Volume Envelope": "Огибающая громкости", "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "Замените или смешайте с огибающей громкости выхода. Чем ближе отношение к 1, тем больше используется выходная огибающая.", "Protect Voiceless Consonants": "Защита глухих согласных", "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "Защитите отчетливые согласные и звуки дыхания, чтобы предотвратить электроакустические разрывы и другие артефакты. Извлечение параметра до максимального значения 0,5 обеспечивает комплексную защиту. Однако уменьшение этого значения может снизить степень защиты, потенциально смягчив эффект индексирования.", "Pitch extraction algorithm": "Алгоритм извлечения высоты тона", "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "Алгоритм извлечения высоты тона, используемый для преобразования звука. По умолчанию используется алгоритм rmvpe, который рекомендуется для большинства случаев.", - "Convert": "Обращать", - "Export Audio": "Экспорт аудио", - "Batch": "Партия", + "Convert": "Преобразовать", + "Export Audio": "Экспортировать аудио", + "Batch": "Пакет", "Input Folder": "Входная папка", "Select the folder containing the audios to convert.": "Выберите папку, содержащую аудиофайлы для преобразования.", "Enter input path": "Введите путь ввода", "Output Folder": "Выходная папка", "Select the folder where the output audios will be saved.": "Выберите папку, в которой будут сохранены выходные аудиозаписи.", "Enter output path": "Введите выходной путь", - "Get information about the audio": "Получение информации об аудио", + "Get information about the audio": "Получить информацию об аудио", "Information about the audio file": "Информация об аудиофайле", - "Waiting for information...": "Жду информации...", + "Waiting for information...": "Ожидание информации...", "## Voice Blender": "## Голосовой блендер", "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "Выберите две модели голоса, установите желаемый процент смешивания и смешайте их в совершенно новый голос.", "Voice Blender": "Голосовой блендер", @@ -129,12 +129,12 @@ "Inroduce the model information": "Ввод информации о модели", "The information to be placed in the model (You can leave it blank or put anything).": "Информация, которая будет размещена в модели (Вы можете оставить ее пустой или поставить что угодно).", "View model information": "Просмотр информации о модели", - "Introduce the model pth path": "Знакомство с моделью pth-пути", - "View": "Вид", + "Introduce the model pth path": "Введите путь к pth модели", + "View": "Просмотр", "Model extraction": "Извлечение модели", "Model conversion": "Преобразование модели", - "Pth file": "P-й файл", - "Output of the pth file": "Вывод p-го файла", + "Pth file": "Pth файл", + "Output of the pth file": "Вывод pth файла", "# How to Report an Issue on GitHub": "# Как сообщить о проблеме на GitHub", "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. Нажмите кнопку «Записать экран» ниже, чтобы начать запись проблемы, с которой вы столкнулись.", "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. После того, как вы закончили запись задачи, нажмите кнопку «Остановить запись» (та же кнопка, но метка меняется в зависимости от того, ведете ли вы активную запись или нет).", @@ -143,22 +143,22 @@ "Record Screen": "Запись экрана", "Record": "Запись", "Stop Recording": "Остановить запись", - "Introduce the model .pth path": "Знакомство с моделью .pth-пути", + "Introduce the model .pth path": "Введите путь к .pth модели", "See Model Information": "Посмотреть информацию о модели", "## Download Model": "## Скачать модель", "Model Link": "Ссылка на модель", - "Introduce the model link": "Введение ссылки на модель", + "Introduce the model link": "Введите ссылку на модель", "Download Model": "Скачать модель", "## Drop files": "## Удаление файлов", "Drag your .pth file and .index file into this space. Drag one and then the other.": "Перетащите файлы .pth и .index в это пространство. Перетащите один, а затем другой.", "TTS Voices": "Голоса TTS", "Select the TTS voice to use for the conversion.": "Выберите голос TTS, который будет использоваться для преобразования.", - "Text to Synthesize": "Синтезируемый текст", + "Text to Synthesize": "Текст для синтеза", "Enter the text to synthesize.": "Введите текст для синтеза.", "Or you can upload a .txt file": "Или вы можете загрузить .txt файл", "Enter text to synthesize": "Введите текст для синтеза", - "Output Path for TTS Audio": "Выходной тракт для TTS Audio", - "Output Path for RVC Audio": "Выходной тракт для RVC Audio", + "Output Path for TTS Audio": "Выходной путь для TTS-аудио", + "Output Path for RVC Audio": "Выходной путь для RVC-аудио", "Enable Applio integration with Discord presence": "Включите интеграцию Applio с присутствием в Discord", "It will activate the possibility of displaying the current Applio activity in Discord.": "Это активирует возможность отображения текущей активности Applio в Discord.", "Enable Applio integration with applio.org/models using flask": "Включите интеграцию Applio с applio.org/models с помощью flask", @@ -171,5 +171,5 @@ "Drag your plugin.zip to install it": "Перетащите plugin.zip, чтобы установить его", "Version Checker": "Проверка версий", "Check which version of Applio is the latest to see if you need to update.": "Проверьте, какая версия Applio является последней, чтобы узнать, нужно ли вам обновление.", - "Check for updates": "Проверьте наличие обновлений" -} \ No newline at end of file + "Check for updates": "Проверка наличия обновлений" +} diff --git a/assets/i18n/languages/ta-IN.json b/assets/i18n/languages/ta-IN.json index 9da9bdc4320a486a202ddb515a9a680684440f44..f73e7496966057c549b01b0546af5194672c1a7b 100644 --- a/assets/i18n/languages/ta-IN.json +++ b/assets/i18n/languages/ta-IN.json @@ -1,204 +1,204 @@ -{ - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "முழுமையான குரல் குளோனிங் கருவி, அநாகரமாக ஒருமிக்க, பகுக்காதது, பயனர் உருவாக்கத்திற்கு உயரியது.\n[தமிழ் மொழிபெயர்ப்பு: Enes](https://discord.com/users/1140031358006202468)", - "This section contains some extra utilities that often may be in experimental phases.": "இந்த பிரிவில் சேர்ந்துள்ள கொடுமை கருவிகளில் சார்ந்த பல கூட்டுத்தரங்களைக் கொண்டுள்ளது.", - "Output Information": "வெளியீடு தகவல்", - "The output information will be displayed here.": "வெளியீடு தகவல் இங்கே காட்டப்படும்.", - "Inference": "கருத்து", - "Train": "பயிற்சி", - "Extra": "கூடுதல்", - "Merge Audios": "ஒரேபோனில் ஒன்றாக்குக", - "Processing": "செயலாக்கம்", - "Audio Analyzer": "ஆடியோ பகுப்பாய்வாளர்", - "Model Information": "மாதிரி தகவல்", - "Plugins": "பிளகின்கள்", - "Download": "பதிவிறக்கம்", - "Report a Bug": "பிழை அறிக்கை", - "Settings": "அமைப்புகள்", - "Preprocess": "முன்பாகவும்", - "Model Name": "மாதிரி பெயர்", - "Name of the new model.": "புதிய மாதிரி பெயர்.", - "Enter model name": "மாதிரி பெயரை உள்ளிடவும்", - "Dataset Path": "தரவுத்தொகுதி பாதை", - "Path to the dataset folder.": "தரவுத்தொகுதி கோப்புக்கு பாதை.", - "Refresh Datasets": "தரவுத்தொகுதிகளை புதுப்பிக்கவும்", - "Dataset Creator": "தரவுத்தொகுதி உருவாக்கி", - "Dataset Name": "தரவுத்தொகுதி பெயர்", - "Name of the new dataset.": "புதிய தரவுத்தொகுதி பெயர்.", - "Enter dataset name": "தரவுத்தொகுதி பெயரை உள்ளிடவும்", - "Upload Audio Dataset": "ஆடியோ தரவுத்தொகுதியை பதிவேற்றவும்", - "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ஆடியோ கோப்பு தரவுத்தொகுதிக்கு வெற்றிகரமாக சேர்க்கப்பட்டுள்ளது. தயவுசெய்து முன்னிருப்பை அழுத்தவும்.", - "Enter dataset path": "தரவுத்தொகுதி பாதையை உள்ளிடவும்", - "Sampling Rate": "மாதிரி விகிதம்", - "The sampling rate of the audio files.": "ஆடியோ கோப்புகளின் மாதிரி விகிதம்.", - "RVC Version": "RVC பதிப்பு", - "The RVC version of the model.": "மாதிரி RVC பதிப்பு.", - "Preprocess Dataset": "முன்பாகவும் தரவுத்தொகுதி", - - "Embedder Model": "உள்ளீடு மாதிரி", - "Model used for learning speaker embedding.": "பேச்சாளர் உள்ளீட்டை கற்க பயன்படுத்தப்படும் மாதிரி.", - "Extract": "எக்ஸ்ட்ராக்ட்", - "Hop Length": "ஹாப் நீளம்", - "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "கருத்துக்கு எவ்வளவு நேரம் எடுத்துக் கொள்கிறது என்றால், அது ஒரு முக்கிய பிச் மாற்றத்திற்கு அமைந்துகொள்கின்றது. சிறிய ஹாப் நீளங்களுக்கு அதிக நேரம் தேவைப்படுகின்றது ஆனால் அவை உயரமான பிச் சரிசெய்தியை உருவாக்க உதவுகின்றன.", - "Batch Size": "பேட்ச் அளவு", - "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "உங்கள் GPU கிடைக்கும் கிடைச்சதை அவசியமாக உள்ளிட பரிந்திருக்கின்றது. 4 என்ற அமைப்பு உயர்த்தப்பட்ட உறுதியுள்ள சொல்லத்தைக் கொண்டுள்ளது ஆனால் அதிக நேரம் பயன்படுகின்றது, 8 அமைப்பு விரைவாக மற்றும் நிலைக்குப் பொருத்தப்படுகிறது.", - "Save Every Epoch": "ஒவ்வொரு காலமும் சேமிக்கவும்", - "Determine at how many epochs the model will saved at.": "மாதிரி எதிர்காலங்களில் எத்தனை படிப்புகளில் மாதிரியைச் சேமிக்க விரும்புகிறீர்கள்.", - "Total Epoch": "மொத்த எபக்", - "Specifies the overall quantity of epochs for the model training process.": "மாதிரி பயிற்சி செய்திகளின் மொத்த அளவை குறிப்பிடுகிறது.", - "Pretrained": "பூர்வதயாரிக", - "Save Only Latest": "கடைசியே சேமிக்கவும்", - "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "இந்த அமைப்பை இயக்கும் போது G மற்றும் D கோப்புகள் உங்கள் கடைசி பதிப்புகளைச் சேமிக்கும், வாயிலாக சேமிக்கப்படுகின்றன.", - "Save Every Weights": "ஒவ்வொரு எடைக்கும் சேமிக்கவும்", - "This setting enables you to save the weights of the model at the conclusion of each epoch.": "இந்த அமைப்பு உங்கள் மாதிரி பயிற்சி செய்தியின் முடிவில் மாதிரிகளை சேமிக்க அனுமதிக்கின்றது.", - "Custom Pretrained": "கஸ்டம் பூர்வதயாரிக", - "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "கஸ்டம் பூர்வதயாரிக மாதிரிகளை பயன்படுத்துவது சிறந்த விளக்கங்களை தரலாம், குறிப்பிடுகின்ற குழப்பத்திற்கு ஏற்றதும் பூர்வதயாரிக மாதிரிகளைத் தேர்ந்தெடுக்க உடனே அந்தக் குழப்பத்தை அபூர்வமாக செயல்படுத்தலாம்.", - "Upload Pretrained Model": "பூர்வதயாரிக மாதிரி மோடெலை பதிவேற்றவும்", - "Refresh Custom Pretraineds": "கஸ்டம் பூர்வதயாரிகளை புதுப்பிக்கவும்", - "Pretrained Custom Settings": "கஸ்டம் பூர்வதயாரிக அமைப்புகள்", - "The file you dropped is not a valid pretrained file. Please try again.": "நீங்கள் பொருத்தவில்லை என்றால் பூர்வதயாரிக கோப்பு அல்ல. மீண்டும் முயற்சிக்கவும்.", - "Click the refresh button to see the pretrained file in the dropdown menu.": "கீழேயுள்ள பட்டி பட்டியில் பூர்வதயாரிக கோப்புக்கு உருவாக்க முயலுங்கள்.", - "Pretrained G Path": "கஸ்டம் பூர்வதயாரிக G பாதை", - "Pretrained D Path": "கஸ்டம் பூர்வதயாரிக D பாதை", - "GPU Settings": "GPU அமைப்புகள்", - "Sets advanced GPU settings, recommended for users with better GPU architecture.": "மேலும் முதிர்ச்சியான GPU அமைப்புகளை அமைக்கின்றது, உடனடியான GPU கருவிக்கு பரிந்திரமான பயனாளர்களுக்கு பரிந்துரிக்கப்படுகிறது.", - "GPU Custom Settings": "GPU கஸ்டம் அமைப்புகள்", - "GPU Number": "GPU எண்", - "0 to ∞ separated by -": "0 இரு ∞ பிரிவாக - வாக்கப்பட்டு", - "GPU Information": "GPU தகவல்", - "Pitch Guidance": "பிச் வழிநிரப்பல்", - "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "பிச் வழிநிரப்பல் மூலம், மூல குரலின் ஒலிக்கோட்டைக் கண்டுகொள்வது சாத்தியமாகின்றது, அதன் பிச்சை கூட. இந்த அம்சம் குரல் பாடலுக்கும் மற்றும் உலாவிகளுக்கும் மூல இசை அல்லது பிச் முதிரையைக் காப்பாற்ற எளியதாக இருக்கும்.", - "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "உங்கள் பயிற்சியில் உங்கள் தனிப்பட்ட மாதிரிகளை பயன்படுத்துவது பூர்வதயாரிக மாதிரிகளை பயன்படுத்துவது குரல் பயிற்சி காலத்தை குறைக்கின்றது மற்றும் மொத்த தரவின் உயர்த்துத்தை அதிகரிக்கின்றது.", - "Extract Features": "அம்சங்கள் எடு", - "Overtraining Detector": "அதிகமாக பயிற்சிப்படுத்தும் அறிவுப்பால்", - "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "பயிற்சிப்படுத்தும் தரவை மிகவும் நன்றாக அறியும் பாதுகாப்பு மூலம் மாதிரி பயிற்சிப்படுத்துதலை தடுக்க, புதிய தரவுக்கு பொதுவாக பொருத்தமாக மாற்ற அனுமதியை இழக்குகிறது.", - "Overtraining Detector Settings": "அதிக பயிற்சிப்படுத்தும் அறிவுப்பால் அமைப்புகள்", - "Overtraining Threshold": "அதிக பயிற்சிப்படுத்தும் அறிவுப்பால் அதிகம்", - "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "அதிகமாக பயிற்சிப்படுத்தும் தரவு அறியப்படாதால் உங்கள் மாதிரியின் பயிற்சிப்படுத்தும் மொத்த எண்ணிக்கையை அமைக்கவும்.", - - "Start Training": "பயிற்சி ஆரம்பிக்கவும்", - "Stop Training & Restart Applio": "பயிற்சி நிறுத்து & புதுப்பிக்க Applio", - "Generate Index": "சுருக்கம் உருவாக்கு", - - "Export Model": "ஏற்றுமதி மாதிரி", - "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'பதிவேற்று' பொத்தான்கள் உள்ளீட்டிற்கு மட்டுமே கூகுள் கோலாப் சேமிப்பகத்திற்கு கடைசியாக கூகுள் டிரைவில் உங்கள் ApplioExported கோப்புக்கு ஏற்றுமதிக்கும்.", - "Exported Pth file": "ஏற்றுமதிக்கப்பட்ட Pth கோப்பு", - "Exported Index file": "ஏற்றுமதிக்கப்பட்ட சுட்டி கோப்பு", - "Select the pth file to be exported": "ஏற்றுமதிக்க வேண்டிய pth கோப்பைத் தேர்ந்தெடுக்கவும்", - "Select the index file to be exported": "ஏற்றுமதிக்க வேண்டிய சுட்டி கோப்பைத் தேர்ந்தெடுக்கவும்", - "Upload": "பதிவேற்று", - - "Voice Model": "குரல் மாதிரி", - "Select the voice model to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த விரும்பும் குரல் மாதிரியை தேர்ந்தெடுக்கவும்.", - "Index File": "சுருக்க கோப்பு", - "Select the index file to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த உள்ள சுருக்க கோப்பை தேர்ந்தெடுக்கவும்.", - "Refresh": "புதுப்பிக்கவும்", - "Unload Voice": "குரல் அமைதி", - "Single": "ஒற்றை", - "Upload Audio": "ஒலியை பதிவேற்று", - "Select Audio": "ஒலியைத் தேர்ந்தெடு", - "Select the audio to convert.": "மாற்றுவதற்கு ஒலியைத் தேர்ந்தெடு.", - "Advanced Settings": "மேம்பாடு அமைப்புகள்", - "Clear Outputs (Deletes all audios in assets/audios)": "வெற்றிகளை அழித்தல் (assets/audios உள்ள அனைத்து ஒலிகளையும் நீக்கும்)", - "Custom Output Path": "கஸ்டம் வெற்றிப் பாதை", - "Output Path": "வெற்றி பாதை", - "The path where the output audio will be saved, by default in assets/audios/output.wav": "வெற்றிகள் உள்ளிடப்பட்ட ஒலியைச் சேமிக்கப்படும் பாதை, பொதுவாக assets/audios/output.wav இல்.", - "Split Audio": "ஒலியை பிரித்தல்", - "Split the audio into chunks for inference to obtain better results in some cases.": "கொலுசுகளாக ஒலியை பிரிக்க, சில நிலைகளில் சிறப்பு விளக்கங்களைப் பெற விரும்புகின்றது.", - "Autotune": "ஆட்டோடியூன்", - "Apply a soft autotune to your inferences, recommended for singing conversions.": "உங்கள் முன்னோடிகளுக்கு ஒரு மென்னுரை ஆட்டோடியூனை பயன்படுத்தவும், பாடல் மாற்றங்களுக்கு பரிந்துரிக்கப்படுகின்றது.", - "Clean Audio": "சுத்தமான ஒலி", - "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "உங்கள் ஒலி வெற்றியை சுத்தமாக்க, பேசும் ஒலிகளுக்கு பரிந்துரிக்கப்படுகின்றது.", - "Clean Strength": "சுத்த வலிமை", - "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "நீங்கள் விரும்பும் ஒலிக்கு சுத்தமாக்க விளக்கு, அதை அதிகரிக்கும்போது அது அதிகரிக்கும், ஆனால் ஒலி குறுகியாக இருக்கலாம்.", - "Pitch": "பிச்", - "Set the pitch of the audio, the higher the value, the higher the pitch.": "ஒலியின் பிச் அமைக்கவும், மதிப்பு உயரானதும் அதிகமாகும்.", - "Filter Radius": "குழப்பத்தின் அருகு", - "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "எண் மூலம் மூன்று அல்லது அதனை விட அதிகமாக இருந்தால், சேகரித்த இசை முடிவுகளில் இயலுமை குறைவாகும் என்று சொல்லப்படுகின்றது.", - "Search Feature Ratio": "தேடல் அம்ச விகிதம்", - "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "இடைவரிசு கோப்பின் மூலம் உள்ள பாதியான ஒருவரிடத்திற்கு உருவாகும் அந்தக் கோப்பு; அதிக மதிப்பு அதிக உருவாகும் என்று அர்த்தம். எனவே, குறோக்கின் குறைந்த மதிப்புகளைத் தேர்வுசெய்வதால் ஒலியில் உள்ள கலப்புகளைத் தவிர்க்க உதவலாம்.", - "Volume Envelope": "அளவு என்வெலோப்", - "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "வெற்றியின் அளவு என்வெலோப் இல் மாறியது அல்லது இணைந்தது. விளக்கு அந்த விகிதம் 1 க்கு அழைத்திருந்தால், வெற்றியின் அளவு என்வெலோப் பயன்படும்.", - "Protect Voiceless Consonants": "குரலின் இல்லாத ஸ்வரக்களைக் காப்பாற்றவும்", - "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "எலக்ட்ரோ-ஒலி கொழுகு மற்றும் பிற கலப்புகளை தடுக்குவதற்கு விரிவான ஸ்வரக்களுக்கு மற்றும் சுவாசத் தானங்களுக்கு பாதுகாக்க. இந்த அளவுக்கு அதிகமாக 0.5 க்கு அழைத்துக் கொள்வது பொருத்தமான பாதுகாப்பை வழங்குகின்றது. ஆனால், இந்த மதிப்பை குறைந்ததாக்கின்றார் என்றால், பாதுகாப்புக்குரிய நிலை குறைந்துவிடப்படலாம் மற்றும் அதுவே இந்தக் குறோக்குனை பரிந்துரிக்கும் என்று எச்சரிக்கை தரகின்றது.", - "Pitch extraction algorithm": "பிச் எக்ஸ்டிரக்ஷன் அளவுத் தொகுப்பு", - "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ஒலி மாற்றத்திற்கு பயன்படுத்த வேண்டிய பிச் எக்ஸ்டிரக்ஷன் அளவுத் தொகுப்பு. இயல்பான அளவுத் தொகுப்பு rmvpe ஆகும், இது அதிகமாக பரிந்துரிக்கப்படுகின்றது.", - - "Convert": "மாற்று", - "Export Audio": "ஒலியை ஏற்றுமதி செய்", - - "Batch": "பேட்ச்", - "Input Folder": "உள்ளிடும் கோப்பு", - "Select the folder containing the audios to convert.": "மாற்ற ஒலிகளைக் கொண்ட கோப்புகளைக் கொண்ட கோப்புக்கு தேர்ந்தெடு.", - "Enter input path": "உள்ளிடும் பாதையை உள்ளிடுக", - "Output Folder": "வெற்றி கோப்பு", - "Select the folder where the output audios will be saved.": "வெற்றிகளைச் சேமிக்கப்படும் கோப்புக்கு தேர்ந்தெடு.", - "Enter output path": "வெற்றியின் பாதையை உள்ளிடுக", - - "Get information about the audio": "ஒலியை பற்றிய தகவல் பெறுக", - - "## Voice Blender": "## குரல் பிளெண்டர்", - "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "இரண்டு குரல் மாதிரிகளைத் தேர்வு செய்து, விரும்பிய குரல் சதவீதம் அமைக்கவும், அவைகளை முழுமையாக ஒரு புதிய குரலாக பிளெண்டுகின்றன.", - "Voice Blender": "குரல் பிளெண்டர்", - "Drag and drop your model here": "உங்கள் மாதிரி இங்கே எழுதவும்", - "You can also use a custom path.": "நீங்கள் கஸ்டம் பாதையையும் பயன்படுத்தலாம்.", - "Blend Ratio": "குரல் சதவீதம்", - "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "ஒரு பக்கத்திற்கு அல்லது மற்றும் மற்றும் அதிகமாக செய்யும் உள்ளீடு இரட்டிப் பார்த்துக் கொள்ளுதல் மாதிரியாகின்றது.", - "Fusion": "ஐக்கியம்", - "Path to Model": "மாதிரிக்கு பாதை", - "Enter path to model": "மாதிரிக்கு பாதையை உள்ளிடுக", - "Model information to be placed": "இருந்து விடப்பட வேண்டிய மாதிரி தகவல்", - "Introduce the model information": "மாதிரி தகவல் அறிமுகம்", - "The information to be placed in the model (You can leave it blank or put anything).": "மாதிரிக்கு வைக்கப்பட வேண்டிய தகவல் (நீங்கள் அந்தச் செயலை விட அந்தச் செய்யாமல் அனைத்ததையும் வைக்கலாம்).", - "View model information": "மாதிரி தகவலைக் காண்க", - "Introduce the model pth path": "மாதிரி pth பாதையை உள்ளிடுக", - "View": "காண்க", - "Model extraction": "மாதிரி எடுத்தல்", - "Model conversion": "மாதிரி மாற்றம்", - "Pth file": "Pth கோப்பு", - "Output of the pth file": "Pth கோப்பின் வெளியேற்றம்", - "# How to Report an Issue on GitHub": "# GitHub-ல் ஒரு பிரச்சினையை புகாரளிக்குவது எப்படி", - "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. நீங்கள் அந்தப் பிரச்சினையை பரிசோதிக்கும் கீழே 'திரையை பதிகம் செய்யும்' பொத்தானை கிளிக் செய்யவும்.", - "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. நீங்கள் அந்தப் பிரச்சினையை பரிசோதித்துக் கொண்டிருக்கின்றீர்கள், அந்தச் செய்யப்படும் பொத்தானை கிளிக் செய்யவும் (இது நீங்கள் சொல்லப்படும் பொத்தான், ஆனால் நீங்கள் எந்தவேணையும் செய்யக்கூடிய நிலையின் போது பொத்தானின் பெயர் மாறுகின்றது).", - "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) க்கு செல்லவும் மற்றும் 'புதிய பிரச்சினை' பொத்தானை கிளிக் செய்யவும்.", - "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. வழுதுணர்ந்து, தேவையான விவரங்களைக் கொண்டு விரிவாக பிரச்சினை பதிவேடு செய்து, முந்தைய படித்த கோப்பை பதிவேடுக்கு பயன்படுத்தலாம்.", - "Record Screen": "திரையை பதிகம் செய்க", - "Record": "பதிகம் செய்க", - "Stop Recording": "பதிகம் நிறுத்துக", - "Introduce the model .pth path": "மாதிரி .pth பாதையை உள்ளிடுக", - "See Model Information": "மாதிரி தகவலைக் காண்க", - "## Download Model": "## மாதிரி பதிவிறக்கம்", - "Model Link": "மாதிரி இணைப்பு", - "Introduce the model link": "மாதிரி இணைப்பை உள்ளிடுக", - "Download Model": "மாதிரி பதிவிறக்கம்", - "## Drop files": "## கோப்புகளை விழுக", - "Drag your .pth file and .index file into this space. Drag one and then the other.": "உங்கள் .pth கோப்பு மற்றும் .index கோப்பை இந்த இடத்திற்கு எழுதுங்கள். ஒருவருக்கு பிறகு ஒருவருக்கு எழுதுங்கள்.", - "## Search Model": "## மாதிரி தேடு", - "Search": "தேடு", - "Introduce the model name to search.": "தேடுவதற்கு மாதிரிப் பெயரை அறிமுகப்படுத்து.", - "We couldn't find models by that name.": "அந்த பெயரில் மாதிரிகளைக் கண்டுபிடிக்க முடியவில்லை.", - - "TTS Voices": "TTS குரல்கள்", - "Select the TTS voice to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த உள்ள TTS குரலை தேர்ந்தெடுக்கவும்.", - "Text to Synthesize": "சிந்தனை செய்ய உள்ள உரை", - "Enter the text to synthesize.": "சிந்தனை செய்ய உள்ள உரையை உள்ளிடுக.", - "Or you can upload a .txt file": "அல்லது .txt கோப்பை பதிவேற்றலாம்", - "Enter text to synthesize": "சிந்தனை செய்ய உள்ள உரையை உள்ளிடுக", - "Output Path for TTS Audio": "TTS குரலுக்கான வெளியேற்ற பாதை", - "Output Path for RVC Audio": "RVC குரலுக்கான வெளியேற்ற பாதை", - "Enable Applio integration with Discord presence": "Discord உள்ளிட்டது உள்ளிடத்துடன் Applio ஒருவருக்கு இயங்குதல் இயல்புநிலை செய்தியை இயக்குங்கள்", - "It will activate the possibility of displaying the current Applio activity in Discord.": "இது Applio செயல்திறனை Discord-ல் காண்பிக்க முடியும்.", - "Enable Applio integration with applio.org/models using flask": "flask ஐப் பயன்படுத்தி applio.org/models உடன் Applio ஒருவருக்கு இயங்குதல் இயல்புநிலை செய்தியை இயக்குங்கள்", - "It will activate the possibility of downloading models with a click from the website.": "இது இணையத்திலிருந்து ஒரு கிளிக்குட்டுடன் மாதிரிகளை பதிவிறக்க முடியும்.", - "Enable fake GPU": "கப்பூ ஜி.பி.யூ ஐ இயக்கு", - "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "ஒரு ஜி.பி.யூ இல்லாமல் தற்போது பயிற்சிப்படுத்தல் ஆதரிக்கப்படவில்லை. பயிற்சிக்கு பட்டி செயல்முறையை செயலாக்க, அமைப்புகள் பட்டியலில் செல்ல, 'பெயர் ஜி.பி.யூ' விருப்பத்தை இயக்கவும்.", - "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "பயிற்சிக்கு பட்டி செயலாக்கிறது. எனவே, இந்த சாதனம் ஜி.பி.யூ திறன் இல்லையாம், அதனால் பயிற்சி ஆதரிக்கப்படவில்லை. இந்த விருப்பம் மட்டுமே சோதனை காரணங்களுக்காக உள்ளது. (இந்த விருப்பம் Applio ஐ மீண்டும் திரும்பியிருப்பதற்காக)", - "Theme": "தீமா", - "Select the theme you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் தீமையை தேர்ந்தெடுக்கவும். (Applio-ஐ மீளவே போகும்)", - "Language": "மொழி", - "Select the language you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் மொழியை தேர்ந்தெடுக்கவும். (Applio-ஐ மீளவே போகும்)", - "Plugin Installer": "பிளகின் நிறுவி", - "Drag your plugin.zip to install it": "உங்கள் plugin.zip கோப்பை இதுக்கு இழுக்கவும் அதை நிறுவுக", - "Version Checker": "பதிப்பு சரிபார்க்கல்", - "Check which version of Applio is the latest to see if you need to update.": "நீங்கள் புதியதாகப் புதுப்பிக்க வேண்டும் என்பதை பார்க்க, Applio இன் எந்த பதிப்பு சரிபார்க்கவும்.", - "Check for updates": "புதுப்பிக்கவும்" +{ + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "முழுமையான குரல் குளோனிங் கருவி, அநாகரமாக ஒருமிக்க, பகுக்காதது, பயனர் உருவாக்கத்திற்கு உயரியது.\n[தமிழ் மொழிபெயர்ப்பு: Enes](https://discord.com/users/1140031358006202468)", + "This section contains some extra utilities that often may be in experimental phases.": "இந்த பிரிவில் சேர்ந்துள்ள கொடுமை கருவிகளில் சார்ந்த பல கூட்டுத்தரங்களைக் கொண்டுள்ளது.", + "Output Information": "வெளியீடு தகவல்", + "The output information will be displayed here.": "வெளியீடு தகவல் இங்கே காட்டப்படும்.", + "Inference": "கருத்து", + "Train": "பயிற்சி", + "Extra": "கூடுதல்", + "Merge Audios": "ஒரேபோனில் ஒன்றாக்குக", + "Processing": "செயலாக்கம்", + "Audio Analyzer": "ஆடியோ பகுப்பாய்வாளர்", + "Model Information": "மாதிரி தகவல்", + "Plugins": "பிளகின்கள்", + "Download": "பதிவிறக்கம்", + "Report a Bug": "பிழை அறிக்கை", + "Settings": "அமைப்புகள்", + "Preprocess": "முன்பாகவும்", + "Model Name": "மாதிரி பெயர்", + "Name of the new model.": "புதிய மாதிரி பெயர்.", + "Enter model name": "மாதிரி பெயரை உள்ளிடவும்", + "Dataset Path": "தரவுத்தொகுதி பாதை", + "Path to the dataset folder.": "தரவுத்தொகுதி கோப்புக்கு பாதை.", + "Refresh Datasets": "தரவுத்தொகுதிகளை புதுப்பிக்கவும்", + "Dataset Creator": "தரவுத்தொகுதி உருவாக்கி", + "Dataset Name": "தரவுத்தொகுதி பெயர்", + "Name of the new dataset.": "புதிய தரவுத்தொகுதி பெயர்.", + "Enter dataset name": "தரவுத்தொகுதி பெயரை உள்ளிடவும்", + "Upload Audio Dataset": "ஆடியோ தரவுத்தொகுதியை பதிவேற்றவும்", + "The audio file has been successfully added to the dataset. Please click the preprocess button.": "ஆடியோ கோப்பு தரவுத்தொகுதிக்கு வெற்றிகரமாக சேர்க்கப்பட்டுள்ளது. தயவுசெய்து முன்னிருப்பை அழுத்தவும்.", + "Enter dataset path": "தரவுத்தொகுதி பாதையை உள்ளிடவும்", + "Sampling Rate": "மாதிரி விகிதம்", + "The sampling rate of the audio files.": "ஆடியோ கோப்புகளின் மாதிரி விகிதம்.", + "Model Architecture": "RVC பதிப்பு", + "Version of the model architecture.": "மாதிரி RVC பதிப்பு.", + "Preprocess Dataset": "முன்பாகவும் தரவுத்தொகுதி", + + "Embedder Model": "உள்ளீடு மாதிரி", + "Model used for learning speaker embedding.": "பேச்சாளர் உள்ளீட்டை கற்க பயன்படுத்தப்படும் மாதிரி.", + "Extract": "எக்ஸ்ட்ராக்ட்", + "Hop Length": "ஹாப் நீளம்", + "Denotes the duration it takes for the system to transition to a significant pitch change. Smaller hop lengths require more time for inference but tend to yield higher pitch accuracy.": "கருத்துக்கு எவ்வளவு நேரம் எடுத்துக் கொள்கிறது என்றால், அது ஒரு முக்கிய பிச் மாற்றத்திற்கு அமைந்துகொள்கின்றது. சிறிய ஹாப் நீளங்களுக்கு அதிக நேரம் தேவைப்படுகின்றது ஆனால் அவை உயரமான பிச் சரிசெய்தியை உருவாக்க உதவுகின்றன.", + "Batch Size": "பேட்ச் அளவு", + "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results.": "உங்கள் GPU கிடைக்கும் கிடைச்சதை அவசியமாக உள்ளிட பரிந்திருக்கின்றது. 4 என்ற அமைப்பு உயர்த்தப்பட்ட உறுதியுள்ள சொல்லத்தைக் கொண்டுள்ளது ஆனால் அதிக நேரம் பயன்படுகின்றது, 8 அமைப்பு விரைவாக மற்றும் நிலைக்குப் பொருத்தப்படுகிறது.", + "Save Every Epoch": "ஒவ்வொரு காலமும் சேமிக்கவும்", + "Determine at how many epochs the model will saved at.": "மாதிரி எதிர்காலங்களில் எத்தனை படிப்புகளில் மாதிரியைச் சேமிக்க விரும்புகிறீர்கள்.", + "Total Epoch": "மொத்த எபக்", + "Specifies the overall quantity of epochs for the model training process.": "மாதிரி பயிற்சி செய்திகளின் மொத்த அளவை குறிப்பிடுகிறது.", + "Pretrained": "பூர்வதயாரிக", + "Save Only Latest": "கடைசியே சேமிக்கவும்", + "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space.": "இந்த அமைப்பை இயக்கும் போது G மற்றும் D கோப்புகள் உங்கள் கடைசி பதிப்புகளைச் சேமிக்கும், வாயிலாக சேமிக்கப்படுகின்றன.", + "Save Every Weights": "ஒவ்வொரு எடைக்கும் சேமிக்கவும்", + "This setting enables you to save the weights of the model at the conclusion of each epoch.": "இந்த அமைப்பு உங்கள் மாதிரி பயிற்சி செய்தியின் முடிவில் மாதிரிகளை சேமிக்க அனுமதிக்கின்றது.", + "Custom Pretrained": "கஸ்டம் பூர்வதயாரிக", + "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance.": "கஸ்டம் பூர்வதயாரிக மாதிரிகளை பயன்படுத்துவது சிறந்த விளக்கங்களை தரலாம், குறிப்பிடுகின்ற குழப்பத்திற்கு ஏற்றதும் பூர்வதயாரிக மாதிரிகளைத் தேர்ந்தெடுக்க உடனே அந்தக் குழப்பத்தை அபூர்வமாக செயல்படுத்தலாம்.", + "Upload Pretrained Model": "பூர்வதயாரிக மாதிரி மோடெலை பதிவேற்றவும்", + "Refresh Custom Pretraineds": "கஸ்டம் பூர்வதயாரிகளை புதுப்பிக்கவும்", + "Pretrained Custom Settings": "கஸ்டம் பூர்வதயாரிக அமைப்புகள்", + "The file you dropped is not a valid pretrained file. Please try again.": "நீங்கள் பொருத்தவில்லை என்றால் பூர்வதயாரிக கோப்பு அல்ல. மீண்டும் முயற்சிக்கவும்.", + "Click the refresh button to see the pretrained file in the dropdown menu.": "கீழேயுள்ள பட்டி பட்டியில் பூர்வதயாரிக கோப்புக்கு உருவாக்க முயலுங்கள்.", + "Pretrained G Path": "கஸ்டம் பூர்வதயாரிக G பாதை", + "Pretrained D Path": "கஸ்டம் பூர்வதயாரிக D பாதை", + "GPU Settings": "GPU அமைப்புகள்", + "Sets advanced GPU settings, recommended for users with better GPU architecture.": "மேலும் முதிர்ச்சியான GPU அமைப்புகளை அமைக்கின்றது, உடனடியான GPU கருவிக்கு பரிந்திரமான பயனாளர்களுக்கு பரிந்துரிக்கப்படுகிறது.", + "GPU Custom Settings": "GPU கஸ்டம் அமைப்புகள்", + "GPU Number": "GPU எண்", + "0 to ∞ separated by -": "0 இரு ∞ பிரிவாக - வாக்கப்பட்டு", + "GPU Information": "GPU தகவல்", + "Pitch Guidance": "பிச் வழிநிரப்பல்", + "By employing pitch guidance, it becomes feasible to mirror the intonation of the original voice, including its pitch. This feature is particularly valuable for singing and other scenarios where preserving the original melody or pitch pattern is essential.": "பிச் வழிநிரப்பல் மூலம், மூல குரலின் ஒலிக்கோட்டைக் கண்டுகொள்வது சாத்தியமாகின்றது, அதன் பிச்சை கூட. இந்த அம்சம் குரல் பாடலுக்கும் மற்றும் உலாவிகளுக்கும் மூல இசை அல்லது பிச் முதிரையைக் காப்பாற்ற எளியதாக இருக்கும்.", + "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality.": "உங்கள் பயிற்சியில் உங்கள் தனிப்பட்ட மாதிரிகளை பயன்படுத்துவது பூர்வதயாரிக மாதிரிகளை பயன்படுத்துவது குரல் பயிற்சி காலத்தை குறைக்கின்றது மற்றும் மொத்த தரவின் உயர்த்துத்தை அதிகரிக்கின்றது.", + "Extract Features": "அம்சங்கள் எடு", + "Overtraining Detector": "அதிகமாக பயிற்சிப்படுத்தும் அறிவுப்பால்", + "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data.": "பயிற்சிப்படுத்தும் தரவை மிகவும் நன்றாக அறியும் பாதுகாப்பு மூலம் மாதிரி பயிற்சிப்படுத்துதலை தடுக்க, புதிய தரவுக்கு பொதுவாக பொருத்தமாக மாற்ற அனுமதியை இழக்குகிறது.", + "Overtraining Detector Settings": "அதிக பயிற்சிப்படுத்தும் அறிவுப்பால் அமைப்புகள்", + "Overtraining Threshold": "அதிக பயிற்சிப்படுத்தும் அறிவுப்பால் அதிகம்", + "Set the maximum number of epochs you want your model to stop training if no improvement is detected.": "அதிகமாக பயிற்சிப்படுத்தும் தரவு அறியப்படாதால் உங்கள் மாதிரியின் பயிற்சிப்படுத்தும் மொத்த எண்ணிக்கையை அமைக்கவும்.", + + "Start Training": "பயிற்சி ஆரம்பிக்கவும்", + "Stop Training & Restart Applio": "பயிற்சி நிறுத்து & புதுப்பிக்க Applio", + "Generate Index": "சுருக்கம் உருவாக்கு", + + "Export Model": "ஏற்றுமதி மாதிரி", + "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive.": "'பதிவேற்று' பொத்தான்கள் உள்ளீட்டிற்கு மட்டுமே கூகுள் கோலாப் சேமிப்பகத்திற்கு கடைசியாக கூகுள் டிரைவில் உங்கள் ApplioExported கோப்புக்கு ஏற்றுமதிக்கும்.", + "Exported Pth file": "ஏற்றுமதிக்கப்பட்ட Pth கோப்பு", + "Exported Index file": "ஏற்றுமதிக்கப்பட்ட சுட்டி கோப்பு", + "Select the pth file to be exported": "ஏற்றுமதிக்க வேண்டிய pth கோப்பைத் தேர்ந்தெடுக்கவும்", + "Select the index file to be exported": "ஏற்றுமதிக்க வேண்டிய சுட்டி கோப்பைத் தேர்ந்தெடுக்கவும்", + "Upload": "பதிவேற்று", + + "Voice Model": "குரல் மாதிரி", + "Select the voice model to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த விரும்பும் குரல் மாதிரியை தேர்ந்தெடுக்கவும்.", + "Index File": "சுருக்க கோப்பு", + "Select the index file to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த உள்ள சுருக்க கோப்பை தேர்ந்தெடுக்கவும்.", + "Refresh": "புதுப்பிக்கவும்", + "Unload Voice": "குரல் அமைதி", + "Single": "ஒற்றை", + "Upload Audio": "ஒலியை பதிவேற்று", + "Select Audio": "ஒலியைத் தேர்ந்தெடு", + "Select the audio to convert.": "மாற்றுவதற்கு ஒலியைத் தேர்ந்தெடு.", + "Advanced Settings": "மேம்பாடு அமைப்புகள்", + "Clear Outputs (Deletes all audios in assets/audios)": "வெற்றிகளை அழித்தல் (assets/audios உள்ள அனைத்து ஒலிகளையும் நீக்கும்)", + "Custom Output Path": "கஸ்டம் வெற்றிப் பாதை", + "Output Path": "வெற்றி பாதை", + "The path where the output audio will be saved, by default in assets/audios/output.wav": "வெற்றிகள் உள்ளிடப்பட்ட ஒலியைச் சேமிக்கப்படும் பாதை, பொதுவாக assets/audios/output.wav இல்.", + "Split Audio": "ஒலியை பிரித்தல்", + "Split the audio into chunks for inference to obtain better results in some cases.": "கொலுசுகளாக ஒலியை பிரிக்க, சில நிலைகளில் சிறப்பு விளக்கங்களைப் பெற விரும்புகின்றது.", + "Autotune": "ஆட்டோடியூன்", + "Apply a soft autotune to your inferences, recommended for singing conversions.": "உங்கள் முன்னோடிகளுக்கு ஒரு மென்னுரை ஆட்டோடியூனை பயன்படுத்தவும், பாடல் மாற்றங்களுக்கு பரிந்துரிக்கப்படுகின்றது.", + "Clean Audio": "சுத்தமான ஒலி", + "Clean your audio output using noise detection algorithms, recommended for speaking audios.": "உங்கள் ஒலி வெற்றியை சுத்தமாக்க, பேசும் ஒலிகளுக்கு பரிந்துரிக்கப்படுகின்றது.", + "Clean Strength": "சுத்த வலிமை", + "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed.": "நீங்கள் விரும்பும் ஒலிக்கு சுத்தமாக்க விளக்கு, அதை அதிகரிக்கும்போது அது அதிகரிக்கும், ஆனால் ஒலி குறுகியாக இருக்கலாம்.", + "Pitch": "பிச்", + "Set the pitch of the audio, the higher the value, the higher the pitch.": "ஒலியின் பிச் அமைக்கவும், மதிப்பு உயரானதும் அதிகமாகும்.", + "Filter Radius": "குழப்பத்தின் அருகு", + "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration.": "எண் மூலம் மூன்று அல்லது அதனை விட அதிகமாக இருந்தால், சேகரித்த இசை முடிவுகளில் இயலுமை குறைவாகும் என்று சொல்லப்படுகின்றது.", + "Search Feature Ratio": "தேடல் அம்ச விகிதம்", + "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio.": "இடைவரிசு கோப்பின் மூலம் உள்ள பாதியான ஒருவரிடத்திற்கு உருவாகும் அந்தக் கோப்பு; அதிக மதிப்பு அதிக உருவாகும் என்று அர்த்தம். எனவே, குறோக்கின் குறைந்த மதிப்புகளைத் தேர்வுசெய்வதால் ஒலியில் உள்ள கலப்புகளைத் தவிர்க்க உதவலாம்.", + "Volume Envelope": "அளவு என்வெலோப்", + "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed.": "வெற்றியின் அளவு என்வெலோப் இல் மாறியது அல்லது இணைந்தது. விளக்கு அந்த விகிதம் 1 க்கு அழைத்திருந்தால், வெற்றியின் அளவு என்வெலோப் பயன்படும்.", + "Protect Voiceless Consonants": "குரலின் இல்லாத ஸ்வரக்களைக் காப்பாற்றவும்", + "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect.": "எலக்ட்ரோ-ஒலி கொழுகு மற்றும் பிற கலப்புகளை தடுக்குவதற்கு விரிவான ஸ்வரக்களுக்கு மற்றும் சுவாசத் தானங்களுக்கு பாதுகாக்க. இந்த அளவுக்கு அதிகமாக 0.5 க்கு அழைத்துக் கொள்வது பொருத்தமான பாதுகாப்பை வழங்குகின்றது. ஆனால், இந்த மதிப்பை குறைந்ததாக்கின்றார் என்றால், பாதுகாப்புக்குரிய நிலை குறைந்துவிடப்படலாம் மற்றும் அதுவே இந்தக் குறோக்குனை பரிந்துரிக்கும் என்று எச்சரிக்கை தரகின்றது.", + "Pitch extraction algorithm": "பிச் எக்ஸ்டிரக்ஷன் அளவுத் தொகுப்பு", + "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases.": "ஒலி மாற்றத்திற்கு பயன்படுத்த வேண்டிய பிச் எக்ஸ்டிரக்ஷன் அளவுத் தொகுப்பு. இயல்பான அளவுத் தொகுப்பு rmvpe ஆகும், இது அதிகமாக பரிந்துரிக்கப்படுகின்றது.", + + "Convert": "மாற்று", + "Export Audio": "ஒலியை ஏற்றுமதி செய்", + + "Batch": "பேட்ச்", + "Input Folder": "உள்ளிடும் கோப்பு", + "Select the folder containing the audios to convert.": "மாற்ற ஒலிகளைக் கொண்ட கோப்புகளைக் கொண்ட கோப்புக்கு தேர்ந்தெடு.", + "Enter input path": "உள்ளிடும் பாதையை உள்ளிடுக", + "Output Folder": "வெற்றி கோப்பு", + "Select the folder where the output audios will be saved.": "வெற்றிகளைச் சேமிக்கப்படும் கோப்புக்கு தேர்ந்தெடு.", + "Enter output path": "வெற்றியின் பாதையை உள்ளிடுக", + + "Get information about the audio": "ஒலியை பற்றிய தகவல் பெறுக", + + "## Voice Blender": "## குரல் பிளெண்டர்", + "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice.": "இரண்டு குரல் மாதிரிகளைத் தேர்வு செய்து, விரும்பிய குரல் சதவீதம் அமைக்கவும், அவைகளை முழுமையாக ஒரு புதிய குரலாக பிளெண்டுகின்றன.", + "Voice Blender": "குரல் பிளெண்டர்", + "Drag and drop your model here": "உங்கள் மாதிரி இங்கே எழுதவும்", + "You can also use a custom path.": "நீங்கள் கஸ்டம் பாதையையும் பயன்படுத்தலாம்.", + "Blend Ratio": "குரல் சதவீதம்", + "Adjusting the position more towards one side or the other will make the model more similar to the first or second.": "ஒரு பக்கத்திற்கு அல்லது மற்றும் மற்றும் அதிகமாக செய்யும் உள்ளீடு இரட்டிப் பார்த்துக் கொள்ளுதல் மாதிரியாகின்றது.", + "Fusion": "ஐக்கியம்", + "Path to Model": "மாதிரிக்கு பாதை", + "Enter path to model": "மாதிரிக்கு பாதையை உள்ளிடுக", + "Model information to be placed": "இருந்து விடப்பட வேண்டிய மாதிரி தகவல்", + "Introduce the model information": "மாதிரி தகவல் அறிமுகம்", + "The information to be placed in the model (You can leave it blank or put anything).": "மாதிரிக்கு வைக்கப்பட வேண்டிய தகவல் (நீங்கள் அந்தச் செயலை விட அந்தச் செய்யாமல் அனைத்ததையும் வைக்கலாம்).", + "View model information": "மாதிரி தகவலைக் காண்க", + "Introduce the model pth path": "மாதிரி pth பாதையை உள்ளிடுக", + "View": "காண்க", + "Model extraction": "மாதிரி எடுத்தல்", + "Model conversion": "மாதிரி மாற்றம்", + "Pth file": "Pth கோப்பு", + "Output of the pth file": "Pth கோப்பின் வெளியேற்றம்", + "# How to Report an Issue on GitHub": "# GitHub-ல் ஒரு பிரச்சினையை புகாரளிக்குவது எப்படி", + "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing.": "1. நீங்கள் அந்தப் பிரச்சினையை பரிசோதிக்கும் கீழே 'திரையை பதிகம் செய்யும்' பொத்தானை கிளிக் செய்யவும்.", + "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not).": "2. நீங்கள் அந்தப் பிரச்சினையை பரிசோதித்துக் கொண்டிருக்கின்றீர்கள், அந்தச் செய்யப்படும் பொத்தானை கிளிக் செய்யவும் (இது நீங்கள் சொல்லப்படும் பொத்தான், ஆனால் நீங்கள் எந்தவேணையும் செய்யக்கூடிய நிலையின் போது பொத்தானின் பெயர் மாறுகின்றது).", + "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button.": "3. [GitHub Issues](https://github.com/IAHispano/Applio/issues) க்கு செல்லவும் மற்றும் 'புதிய பிரச்சினை' பொத்தானை கிளிக் செய்யவும்.", + "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step.": "4. வழுதுணர்ந்து, தேவையான விவரங்களைக் கொண்டு விரிவாக பிரச்சினை பதிவேடு செய்து, முந்தைய படித்த கோப்பை பதிவேடுக்கு பயன்படுத்தலாம்.", + "Record Screen": "திரையை பதிகம் செய்க", + "Record": "பதிகம் செய்க", + "Stop Recording": "பதிகம் நிறுத்துக", + "Introduce the model .pth path": "மாதிரி .pth பாதையை உள்ளிடுக", + "See Model Information": "மாதிரி தகவலைக் காண்க", + "## Download Model": "## மாதிரி பதிவிறக்கம்", + "Model Link": "மாதிரி இணைப்பு", + "Introduce the model link": "மாதிரி இணைப்பை உள்ளிடுக", + "Download Model": "மாதிரி பதிவிறக்கம்", + "## Drop files": "## கோப்புகளை விழுக", + "Drag your .pth file and .index file into this space. Drag one and then the other.": "உங்கள் .pth கோப்பு மற்றும் .index கோப்பை இந்த இடத்திற்கு எழுதுங்கள். ஒருவருக்கு பிறகு ஒருவருக்கு எழுதுங்கள்.", + "## Search Model": "## மாதிரி தேடு", + "Search": "தேடு", + "Introduce the model name to search.": "தேடுவதற்கு மாதிரிப் பெயரை அறிமுகப்படுத்து.", + "We couldn't find models by that name.": "அந்த பெயரில் மாதிரிகளைக் கண்டுபிடிக்க முடியவில்லை.", + + "TTS Voices": "TTS குரல்கள்", + "Select the TTS voice to use for the conversion.": "மாற்றத்திற்கு பயன்படுத்த உள்ள TTS குரலை தேர்ந்தெடுக்கவும்.", + "Text to Synthesize": "சிந்தனை செய்ய உள்ள உரை", + "Enter the text to synthesize.": "சிந்தனை செய்ய உள்ள உரையை உள்ளிடுக.", + "Or you can upload a .txt file": "அல்லது .txt கோப்பை பதிவேற்றலாம்", + "Enter text to synthesize": "சிந்தனை செய்ய உள்ள உரையை உள்ளிடுக", + "Output Path for TTS Audio": "TTS குரலுக்கான வெளியேற்ற பாதை", + "Output Path for RVC Audio": "RVC குரலுக்கான வெளியேற்ற பாதை", + "Enable Applio integration with Discord presence": "Discord உள்ளிட்டது உள்ளிடத்துடன் Applio ஒருவருக்கு இயங்குதல் இயல்புநிலை செய்தியை இயக்குங்கள்", + "It will activate the possibility of displaying the current Applio activity in Discord.": "இது Applio செயல்திறனை Discord-ல் காண்பிக்க முடியும்.", + "Enable Applio integration with applio.org/models using flask": "flask ஐப் பயன்படுத்தி applio.org/models உடன் Applio ஒருவருக்கு இயங்குதல் இயல்புநிலை செய்தியை இயக்குங்கள்", + "It will activate the possibility of downloading models with a click from the website.": "இது இணையத்திலிருந்து ஒரு கிளிக்குட்டுடன் மாதிரிகளை பதிவிறக்க முடியும்.", + "Enable fake GPU": "கப்பூ ஜி.பி.யூ ஐ இயக்கு", + "Training is currently unsupported due to the absence of a GPU. To activate the training tab, navigate to the settings tab and enable the 'Fake GPU' option.": "ஒரு ஜி.பி.யூ இல்லாமல் தற்போது பயிற்சிப்படுத்தல் ஆதரிக்கப்படவில்லை. பயிற்சிக்கு பட்டி செயல்முறையை செயலாக்க, அமைப்புகள் பட்டியலில் செல்ல, 'பெயர் ஜி.பி.யூ' விருப்பத்தை இயக்கவும்.", + "Activates the train tab. However, please note that this device lacks GPU capabilities, hence training is not supported. This option is only for testing purposes. (This option will restart Applio)": "பயிற்சிக்கு பட்டி செயலாக்கிறது. எனவே, இந்த சாதனம் ஜி.பி.யூ திறன் இல்லையாம், அதனால் பயிற்சி ஆதரிக்கப்படவில்லை. இந்த விருப்பம் மட்டுமே சோதனை காரணங்களுக்காக உள்ளது. (இந்த விருப்பம் Applio ஐ மீண்டும் திரும்பியிருப்பதற்காக)", + "Theme": "தீமா", + "Select the theme you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் தீமையை தேர்ந்தெடுக்கவும். (Applio-ஐ மீளவே போகும்)", + "Language": "மொழி", + "Select the language you want to use. (Requires restarting Applio)": "நீங்கள் பயன்படுத்த விரும்பும் மொழியை தேர்ந்தெடுக்கவும். (Applio-ஐ மீளவே போகும்)", + "Plugin Installer": "பிளகின் நிறுவி", + "Drag your plugin.zip to install it": "உங்கள் plugin.zip கோப்பை இதுக்கு இழுக்கவும் அதை நிறுவுக", + "Version Checker": "பதிப்பு சரிபார்க்கல்", + "Check which version of Applio is the latest to see if you need to update.": "நீங்கள் புதியதாகப் புதுப்பிக்க வேண்டும் என்பதை பார்க்க, Applio இன் எந்த பதிப்பு சரிபார்க்கவும்.", + "Check for updates": "புதுப்பிக்கவும்" } \ No newline at end of file diff --git a/assets/i18n/languages/ta_TA.json b/assets/i18n/languages/ta_TA.json index f457564074033f7fd5659cddb3655d7df43d9e06..27cd4e3db52fbced9c6b3f1e2082fa41cd26adb4 100644 --- a/assets/i18n/languages/ta_TA.json +++ b/assets/i18n/languages/ta_TA.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "இறுதி குரல் குளோனிங் கருவி, நிகரற்ற சக்தி, மாடுலாரிட்டி மற்றும் பயனர் நட்பு அனுபவத்திற்காக உன்னிப்பாக உகந்ததாக உள்ளது.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "இறுதி குரல் குளோனிங் கருவி, நிகரற்ற சக்தி, மாடுலாரிட்டி மற்றும் பயனர் நட்பு அனுபவத்திற்காக உன்னிப்பாக உகந்ததாக உள்ளது.", "This section contains some extra utilities that often may be in experimental phases.": "இந்த பிரிவில் சில கூடுதல் பயன்பாடுகள் உள்ளன, அவை பெரும்பாலும் சோதனை கட்டங்களில் இருக்கலாம்.", "Output Information": "வெளியீட்டு தகவல்", "The output information will be displayed here.": "வெளியீட்டு தகவல் இங்கே காட்டப்படும்.", @@ -30,8 +30,8 @@ "Enter dataset path": "தரவுத்தொகுப்பு பாதையை உள்ளிடவும்", "Sampling Rate": "மாதிரி மதிப்பீடு", "The sampling rate of the audio files.": "ஆடியோ கோப்புகளின் மாதிரி விகிதம்.", - "RVC Version": "RVC பதிப்பு", - "The RVC version of the model.": "மாடலின் RVC பதிப்பு.", + "Model Architecture": "RVC பதிப்பு", + "Version of the model architecture.": "மாடலின் RVC பதிப்பு.", "Preprocess Dataset": "செயல்முறைக்கு முந்தைய தரவுத்தொகுப்பு", "Extract": "பிரித்தெடுத்தல்", "Hop Length": "ஹாப் நீளம்", diff --git a/assets/i18n/languages/te_TE.json b/assets/i18n/languages/te_TE.json index c8cace5ac719bd6ce4af59ba72acd7e5cdbfb8c9..290be7405222f98da9e832ffbe4ffa8da53493ec 100644 --- a/assets/i18n/languages/te_TE.json +++ b/assets/i18n/languages/te_TE.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "అల్టిమేట్ వాయిస్ క్లోనింగ్ టూల్, సాటిలేని శక్తి, మాడ్యులారిటీ మరియు వినియోగదారు-స్నేహపూర్వక అనుభవం కోసం జాగ్రత్తగా ఆప్టిమైజ్ చేయబడింది.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "అల్టిమేట్ వాయిస్ క్లోనింగ్ టూల్, సాటిలేని శక్తి, మాడ్యులారిటీ మరియు వినియోగదారు-స్నేహపూర్వక అనుభవం కోసం జాగ్రత్తగా ఆప్టిమైజ్ చేయబడింది.", "This section contains some extra utilities that often may be in experimental phases.": "ఈ విభాగంలో కొన్ని అదనపు ఉపయోగాలు ఉన్నాయి, అవి తరచుగా ప్రయోగాత్మక దశలలో ఉండవచ్చు.", "Output Information": "అవుట్ పుట్ సమాచారం", "The output information will be displayed here.": "అవుట్ పుట్ సమాచారం ఇక్కడ ప్రదర్శించబడుతుంది.", @@ -30,8 +30,8 @@ "Enter dataset path": "డేటాసెట్ మార్గాన్ని నమోదు చేయండి", "Sampling Rate": "నమూనా రేటు", "The sampling rate of the audio files.": "ఆడియో ఫైల్స్ యొక్క నమూనా రేటు.", - "RVC Version": "RVC Version", - "The RVC version of the model.": "మోడల్ యొక్క ఆర్ విసి వెర్షన్.", + "Model Architecture": "Model Architecture", + "Version of the model architecture.": "మోడల్ యొక్క ఆర్ విసి వెర్షన్.", "Preprocess Dataset": "ప్రీప్రాసెస్ Dataset", "Extract": "ఉద్ధరించు", "Hop Length": "హాప్ పొడవు", diff --git a/assets/i18n/languages/th_TH.json b/assets/i18n/languages/th_TH.json index 904ce80497d1dd661d2800ff9fe43457eb8749f9..ba5ea4f11d2ffc331f0f8d455021abf5403eb07d 100644 --- a/assets/i18n/languages/th_TH.json +++ b/assets/i18n/languages/th_TH.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "สุดยอดเครื่องมือโคลนเสียงที่ได้รับการปรับให้เหมาะสมอย่างพิถีพิถันเพื่อพลังที่ไม่มีใครเทียบได้ความเป็นโมดูลและประสบการณ์ที่ใช้งานง่าย", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "สุดยอดเครื่องมือโคลนเสียงที่ได้รับการปรับให้เหมาะสมอย่างพิถีพิถันเพื่อพลังที่ไม่มีใครเทียบได้ความเป็นโมดูลและประสบการณ์ที่ใช้งานง่าย", "This section contains some extra utilities that often may be in experimental phases.": "ส่วนนี้ประกอบด้วยยูทิลิตี้พิเศษบางอย่างที่มักจะอยู่ในขั้นตอนการทดลอง", "Output Information": "ข้อมูลเอาต์พุต", "The output information will be displayed here.": "ข้อมูลผลลัพธ์จะแสดงที่นี่", @@ -30,8 +30,8 @@ "Enter dataset path": "ป้อนเส้นทางชุดข้อมูล", "Sampling Rate": "Samp อัตราลิง", "The sampling rate of the audio files.": "อัตราการสุ่มตัวอย่างของไฟล์เสียง", - "RVC Version": "รุ่น RVC", - "The RVC version of the model.": "รุ่น RVC ของรุ่น", + "Model Architecture": "รุ่น RVC", + "Version of the model architecture.": "รุ่น RVC ของรุ่น", "Preprocess Dataset": "ชุดข้อมูลประมวลผลล่วงหน้า", "Extract": "สกัด", "Hop Length": "ความยาวกระโดด", diff --git a/assets/i18n/languages/tr_TR.json b/assets/i18n/languages/tr_TR.json index c315869327800bc6d51d52b1b287a971ee0ebc6d..614de44ba850dfd997ebf31df2788d34300743fb 100644 --- a/assets/i18n/languages/tr_TR.json +++ b/assets/i18n/languages/tr_TR.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Üstün ses kopyalama aracı, rakipsiz güç, modülerlik ve kullanıcı dostu deneyim için özenle optimize edilmiştir.\n[Türkçe çeviri: Enes](https://discord.com/users/1140031358006202468)", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Üstün ses kopyalama aracı, rakipsiz güç, modülerlik ve kullanıcı dostu deneyim için özenle optimize edilmiştir.\n[Türkçe çeviri: Enes](https://discord.com/users/1140031358006202468)", "This section contains some extra utilities that often may be in experimental phases.": "Bu bölümde, genellikle deneysel aşamalarda olabilecek bazı ekstra yardımcı programlar bulunur.", "Output Information": "Çıktı Bilgileri", "The output information will be displayed here.": "Çıktı bilgileri burada görüntülenecektir.", @@ -32,8 +32,8 @@ "Enter dataset path": "Veri kümesi yolunu girin", "Sampling Rate": "Örnekleme Hızı", "The sampling rate of the audio files.": "Ses dosyalarının örnekleme hızı.", - "RVC Version": "RVC Sürümü", - "The RVC version of the model.": "Modelin RVC sürümü.", + "Model Architecture": "RVC Sürümü", + "Version of the model architecture.": "Modelin RVC sürümü.", "Preprocess Dataset": "Veri Kümesini İşlem Öncesinden Geçir", "Embedder Model": "Gömme Modeli", diff --git a/assets/i18n/languages/uk_UK.json b/assets/i18n/languages/uk_UK.json index 9f16903513c0e463f5c5ee7ecd13effb957aa7bb..ea2441cfffad635d76330de546830bc184004fb6 100644 --- a/assets/i18n/languages/uk_UK.json +++ b/assets/i18n/languages/uk_UK.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Найкращий інструмент для клонування голосу, ретельно оптимізований для неперевершеної потужності, модульності та зручного використання.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Найкращий інструмент для клонування голосу, ретельно оптимізований для неперевершеної потужності, модульності та зручного використання.", "This section contains some extra utilities that often may be in experimental phases.": "Цей розділ містить деякі додаткові утиліти, які часто можуть перебувати в експериментальних стадіях.", "Output Information": "Вихідна інформація", "The output information will be displayed here.": "Тут буде відображена вихідна інформація.", @@ -32,8 +32,8 @@ "Enter dataset path": "Введіть шлях до датасету", "Sampling Rate": "Частота дискретизації", "The sampling rate of the audio files.": "Частота дискретизації аудіофайлів.", - "RVC Version": "Версія RVC", - "The RVC version of the model.": "Версія моделі RVC.", + "Model Architecture": "Версія RVC", + "Version of the model architecture.": "Версія моделі RVC.", "Preprocess Dataset": "Обробити датасет", "Embedder Model": "Модель розгортання", diff --git a/assets/i18n/languages/ur_UR.json b/assets/i18n/languages/ur_UR.json index c8e4b5373d5458d2ae84bf98204b2aaa73f52c5b..1b5e37c1a3abed8b81477d886c9d5a3767b9f4d1 100644 --- a/assets/i18n/languages/ur_UR.json +++ b/assets/i18n/languages/ur_UR.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "حتمی صوتی کلوننگ ٹول ، بے مثال طاقت ، ماڈیولریٹی ، اور صارف دوست تجربے کے لئے احتیاط سے بہتر بنایا گیا ہے۔", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "حتمی صوتی کلوننگ ٹول ، بے مثال طاقت ، ماڈیولریٹی ، اور صارف دوست تجربے کے لئے احتیاط سے بہتر بنایا گیا ہے۔", "This section contains some extra utilities that often may be in experimental phases.": "اس سیکشن میں کچھ اضافی افادیت شامل ہیں جو اکثر تجرباتی مراحل میں ہوسکتی ہیں۔", "Output Information": "آؤٹ پٹ معلومات", "The output information will be displayed here.": "آؤٹ پٹ کی معلومات یہاں ظاہر کی جائے گی۔", @@ -30,8 +30,8 @@ "Enter dataset path": "ڈیٹا سیٹ کا راستہ درج کریں", "Sampling Rate": "نمونے لینے کی شرح", "The sampling rate of the audio files.": "آڈیو فائلوں کے نمونے لینے کی شرح۔", - "RVC Version": "RVC Version", - "The RVC version of the model.": "ماڈل کا آر وی سی ورژن۔", + "Model Architecture": "Model Architecture", + "Version of the model architecture.": "ماڈل کا آر وی سی ورژن۔", "Preprocess Dataset": "پری پروسیس ڈیٹا سیٹ", "Extract": "نکالنا", "Hop Length": "ہاپ کی لمبائی", diff --git a/assets/i18n/languages/vi_VI.json b/assets/i18n/languages/vi_VI.json index 4649041d0c54ffa0261e5e06e12433eab380535e..9346cc9f3cf682a803a5b65a978ad5d6f77d0017 100644 --- a/assets/i18n/languages/vi_VI.json +++ b/assets/i18n/languages/vi_VI.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "Công cụ nhân bản giọng nói tối ưu, được tối ưu hóa tỉ mỉ cho sức mạnh vô song, tính mô-đun và trải nghiệm thân thiện với người dùng.", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "Công cụ nhân bản giọng nói tối ưu, được tối ưu hóa tỉ mỉ cho sức mạnh vô song, tính mô-đun và trải nghiệm thân thiện với người dùng.", "This section contains some extra utilities that often may be in experimental phases.": "Phần này chứa một số tiện ích bổ sung thường có thể đang trong giai đoạn thử nghiệm.", "Output Information": "Thông tin đầu ra", "The output information will be displayed here.": "Thông tin đầu ra sẽ được hiển thị ở đây.", @@ -30,8 +30,8 @@ "Enter dataset path": "Nhập đường dẫn tập dữ liệu", "Sampling Rate": "Tỷ lệ lấy mẫu", "The sampling rate of the audio files.": "Tốc độ lấy mẫu của các tệp âm thanh.", - "RVC Version": "Phiên bản RVC", - "The RVC version of the model.": "Phiên bản RVC của mô hình.", + "Model Architecture": "Phiên bản RVC", + "Version of the model architecture.": "Phiên bản RVC của mô hình.", "Preprocess Dataset": "Tập dữ liệu tiền xử lý", "Extract": "Trích", "Hop Length": "Chiều dài hop", diff --git a/assets/i18n/languages/zh_CN.json b/assets/i18n/languages/zh_CN.json index 9fff7cb16c0106df0a428cdace92adc02d87599b..329dfc5cfac56240bb1a549a57a89ae681c9cbdf 100644 --- a/assets/i18n/languages/zh_CN.json +++ b/assets/i18n/languages/zh_CN.json @@ -1,5 +1,5 @@ { - "Ultimate voice cloning tool, meticulously optimized for unrivaled power, modularity, and user-friendly experience.": "终极语音克隆工具,经过精心优化,具有无与伦比的功能、模块化和用户友好的体验。", + "VITS-based Voice Conversion focused on simplicity, quality and performance.": "终极语音克隆工具,经过精心优化,具有无与伦比的功能、模块化和用户友好的体验。", "This section contains some extra utilities that often may be in experimental phases.": "本节包含一些额外的实用程序,这些实用程序通常处于实验阶段。", "Output Information": "输出信息", "Inference": "推理", @@ -17,7 +17,7 @@ "Dataset Path": "数据集路径", "Enter dataset path": "输入数据集路径", "Sampling Rate": "采样率", - "RVC Version": "RVC 版本", + "Model Architecture": "RVC 版本", "Preprocess Dataset": "预处理数据集", "Extract": "提取", "Hop Length": "跳跃长度", diff --git a/assets/installation_checker.py b/assets/installation_checker.py index 75a14af74eea94dbd69096ec71489f0a82ccd525..2d665a509ca7a43e43b98b6e0ff785628eeb313a 100644 --- a/assets/installation_checker.py +++ b/assets/installation_checker.py @@ -17,22 +17,22 @@ def check_installation(): current_drive = os.path.splitdrive(now_dir)[0] if current_drive.upper() != system_drive.upper(): raise InstallationError( - f"Error: Current working directory is not on the default system drive ({system_drive}). Please move Applio in the correct drive." + f"Installation Error: The current working directory is on drive {current_drive}, but the default system drive is {system_drive}. Please move Applio to the {system_drive} drive." ) except: pass else: if "OneDrive" in now_dir: raise InstallationError( - "Error: Current working directory is on OneDrive. Please move Applio in another folder." + "Installation Error: The current working directory is located in OneDrive. Please move Applio to a different folder." ) elif " " in now_dir: raise InstallationError( - "Error: Current working directory contains spaces. Please move Applio in another folder." + "Installation Error: The current working directory contains spaces. Please move Applio to a folder without spaces in its path." ) try: now_dir.encode("ascii") except UnicodeEncodeError: raise InstallationError( - "Error: Current working directory contains non-ASCII characters. Please move Applio in another folder." + "Installation Error: The current working directory contains non-ASCII characters. Please move Applio to a folder with only ASCII characters in its path." ) diff --git a/assets/presets/Default.json b/assets/presets/Default.json new file mode 100644 index 0000000000000000000000000000000000000000..9f94448ca5fefd38a6b9761c6ca1e031c242259a --- /dev/null +++ b/assets/presets/Default.json @@ -0,0 +1,7 @@ +{ + "pitch": 0, + "filter_radius": 3, + "index_rate": 0.75, + "rms_mix_rate": 1, + "protect": 0.5 +} \ No newline at end of file diff --git a/assets/presets/Good for Anything.json b/assets/presets/Good for Anything.json new file mode 100644 index 0000000000000000000000000000000000000000..8c6d878a0b8de2a5d808c84e0183cc5812b4c751 --- /dev/null +++ b/assets/presets/Good for Anything.json @@ -0,0 +1,7 @@ +{ + "pitch": 0, + "filter_radius": 3, + "index_rate": 0.75, + "rms_mix_rate": 0.3, + "protect": 0.33 +} \ No newline at end of file diff --git a/assets/presets/Music.json b/assets/presets/Music.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac821bb48d00013df68be3df18a84145de644db --- /dev/null +++ b/assets/presets/Music.json @@ -0,0 +1,7 @@ +{ + "pitch": 0, + "filter_radius": 3, + "index_rate": 0.75, + "rms_mix_rate": 0.25, + "protect": 0.33 +} \ No newline at end of file diff --git a/assets/themes/loadThemes.py b/assets/themes/loadThemes.py index 0fe66c015880b9bfad1dba355df83738db8fabeb..fcea2c9fbb8164f68776833e09123e0657a1bb79 100644 --- a/assets/themes/loadThemes.py +++ b/assets/themes/loadThemes.py @@ -110,7 +110,7 @@ def load_json(): module = importlib.import_module(selected_file[:-3]) obtained_class = getattr(module, class_name) instance = obtained_class() - print(f"Theme Loaded: {class_name}") + print(f"Theme {class_name} successfully loaded.") return instance elif selected_file == None and class_name: return class_name diff --git a/assets/version_checker.py b/assets/version_checker.py index 3cd0a2e0b14cc28df73cdc1d2b8568ff3f90fef1..1a364f9604d52d5aa464e8629b2cfda7a28222ca 100644 --- a/assets/version_checker.py +++ b/assets/version_checker.py @@ -1,4 +1,5 @@ -import os, sys +import os +import sys import json import requests @@ -9,36 +10,48 @@ config_file = os.path.join(now_dir, "assets", "config.json") def load_local_version(): - with open(config_file, "r", encoding="utf8") as file: - config = json.load(file) - return config["version"] + try: + with open(config_file, "r", encoding="utf8") as file: + config = json.load(file) + return config["version"] + except (FileNotFoundError, json.JSONDecodeError) as error: + print(f"Error loading local version: {error}") + return None def obtain_tag_name(): url = "https://api.github.com/repos/IAHispano/Applio/releases/latest" + session = requests.Session() try: - response = requests.get(url) + response = session.get(url) response.raise_for_status() data = response.json() - tag_name = data["tag_name"] - - return tag_name + return data.get("tag_name") - except requests.exceptions.RequestException as e: - print(f"Error: {e}") + except requests.exceptions.RequestException as error: + print(f"Error obtaining online version: {error}") return None def compare_version(): local_version = load_local_version() + if not local_version: + return "Local version could not be determined." + online_version = obtain_tag_name() + if not online_version: + return "Online version could not be determined. Make sure you have an internet connection." + elements_online_version = list(map(int, online_version.split("."))) elements_local_version = list(map(int, local_version.split("."))) for online, local in zip(elements_online_version, elements_local_version): if local < online: - return f"Your local {local_version} version is older than {online_version} the latest version" + return f"Your local version {local_version} is older than the latest version {online_version}." + + if len(elements_online_version) > len(elements_local_version): + return f"Your local version {local_version} is older than the latest version {online_version}." return f"Your local version {local_version} is the latest version." diff --git a/assets/zluda/README.md b/assets/zluda/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7c9b398372c30bc3307ed8763cd5622d53e34854 --- /dev/null +++ b/assets/zluda/README.md @@ -0,0 +1,63 @@ +## Installation and Setup Instructions + +### 1. Install VC++ Runtime +Download and install the VC++ Runtime from [this link](https://aka.ms/vs/17/release/vc_redist.x64.exe). + +### 2. Install HIP SDK + +- **Read the [System Requirements](https://rocm.docs.amd.com/projects/install-on-windows/en/develop/reference/system-requirements.html)** + + Check the **"Windows-supported GPUs"** section to determine the correct installation steps: + + - **If your GPU has a green checkbox in the HIP SDK column:** + - **Install either v6.1.2 or v5.7.1 HIP SDK** + - Download from [AMD ROCm Hub](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html) + + - **If your GPU has a red cross in the HIP SDK column:** + - **Install v5.7.1 HIP SDK** + - For 6700, 6700XT, 6750XT, download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/Optimised_ROCmLibs_gfx1031.7z) + - For 6600, 6600XT, 6650XT, download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/Optimised_ROCmLibs_gfx1032.7z) + + **Steps:** + 1. Rename `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\library` to `library.old` + 2. Create a new folder named `library` + 3. Unzip the content of the archive into that folder + + - **If your GPU is not listed:** + - **Install v5.7.1 HIP SDK** + 1. Google "techpowerup your_gpu" to find the value of "Shader ISA" (gfxnnnn). Only `gfx803/900/906/1010/1011/1012/1030/1100/1101/1102` are supported. + 2. Download [this archive](https://github.com/brknsoul/ROCmLibs/raw/main/ROCmLibs.7z) + + **Steps:** + 1. Rename `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\library` to `library.old` + 2. Unzip the content of the archive into `C:\Program Files\AMD\ROCm\5.7\bin\rocblas\` + +### 3. Patching Applio + +1. **Move all `.bat` files from the `zluda` folder to the root directory of Applio.** + +2. **For Precompiled Applio:** + - Run `reinstall-torch.bat` to patch Applio. + +3. **For Applio Source Code:** + 1. Open `run-install.bat` and update the Torch versions on line 67: + ```sh + pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118 + ``` + 2. Execute `run-install.bat` to install the required libraries. + 3. Manually apply the code changes from the pull request. + +### 4. Download Zluda and Patch Torch Libraries + +1. **For HIP SDK 5.7:** + - Run `patch_zluda_hip57.bat`. + - Add `C:\Program Files\AMD\ROCm\5.7\bin` to your system's Path environment variable. + +2. **For HIP SDK 6.1:** + - Run `patch_zluda_hip61.bat`. + - Add `C:\Program Files\AMD\ROCm\6.1\bin` to your system's Path environment variable. + +### 5. Starting Applio + +- Execute `run-applio-zluda.bat` to start Applio. + diff --git a/assets/zluda/patch-zluda-hip57.bat b/assets/zluda/patch-zluda-hip57.bat new file mode 100644 index 0000000000000000000000000000000000000000..1973a59a30b49da680b082b202281c691784e16f --- /dev/null +++ b/assets/zluda/patch-zluda-hip57.bat @@ -0,0 +1,7 @@ +rmdir /S /q zluda +curl -s -L https://github.com/lshqqytiger/ZLUDA/releases/download/rel.11cc5844514f93161e0e74387f04e2c537705a82/ZLUDA-windows-amd64.zip > zluda.zip +tar -xf zluda.zip +del zluda.zip +copy zluda\cublas.dll env\Lib\site-packages\torch\lib\cublas64_11.dll /y +copy zluda\cusparse.dll env\Lib\site-packages\torch\lib\cusparse64_11.dll /y +copy zluda\nvrtc.dll env\Lib\site-packages\torch\lib\nvrtc64_112_0.dll /y diff --git a/assets/zluda/patch-zluda-hip61.bat b/assets/zluda/patch-zluda-hip61.bat new file mode 100644 index 0000000000000000000000000000000000000000..1378f9a6bcca9b03c9020e2011822e07b09e54fc --- /dev/null +++ b/assets/zluda/patch-zluda-hip61.bat @@ -0,0 +1,7 @@ +rmdir /S /q zluda +curl -s -L https://github.com/lshqqytiger/ZLUDA/releases/download/rel.86cdab3b14b556e95eafe370b8e8a1a80e8d093b/ZLUDA-windows-amd64.zip > zluda.zip +tar -xf zluda.zip +del zluda.zip +copy zluda\cublas.dll env\Lib\site-packages\torch\lib\cublas64_11.dll /y +copy zluda\cusparse.dll env\Lib\site-packages\torch\lib\cusparse64_11.dll /y +copy zluda\nvrtc.dll env\Lib\site-packages\torch\lib\nvrtc64_112_0.dll /y diff --git a/assets/zluda/reinstall-torch.bat b/assets/zluda/reinstall-torch.bat new file mode 100644 index 0000000000000000000000000000000000000000..5dab777944fc1fabac91e148a6368a3cca5b26cc --- /dev/null +++ b/assets/zluda/reinstall-torch.bat @@ -0,0 +1,11 @@ +set "principal=%cd%" +set "URL_EXTRA=https://huggingface.co/IAHispano/applio/resolve/main" +set "CONDA_ROOT_PREFIX=%UserProfile%\Miniconda3" +set "INSTALL_ENV_DIR=%principal%\env" +set "MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py39_23.9.0-0-Windows-x86_64.exe" +set "CONDA_EXECUTABLE=%CONDA_ROOT_PREFIX%\Scripts\conda.exe" + +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" +rem pip uninstall torch torchvision torchaudio -y +rem pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118 +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" deactivate diff --git a/assets/zluda/run-applio-amd.bat b/assets/zluda/run-applio-amd.bat new file mode 100644 index 0000000000000000000000000000000000000000..0fe41e859526a7ef21422b19a3fa797d2738bd67 --- /dev/null +++ b/assets/zluda/run-applio-amd.bat @@ -0,0 +1,14 @@ +@echo off +setlocal +title Applio + +if not exist env ( + echo Please run 'run-install.bat' first to set up the environment. + pause + exit /b 1 +) + +set HIP_VISIBLE_DEVICES="0" +zluda\zluda.exe -- env\python.exe app.py --open +echo. +pause \ No newline at end of file diff --git a/core.py b/core.py index 67c56ecf36b7b5804c2327590ba8bee2a6620671..8084b6b7cd96572ccad8ae404eee3fc2f3c593b8 100644 --- a/core.py +++ b/core.py @@ -1,5 +1,4 @@ import os -import spaces import sys import json import argparse @@ -14,7 +13,6 @@ current_script_directory = os.path.dirname(os.path.realpath(__file__)) logs_path = os.path.join(current_script_directory, "logs") from rvc.lib.tools.prerequisites_download import prequisites_download_pipeline -from rvc.train.extract.preparing_files import generate_config, generate_filelist from rvc.train.process.model_blender import model_blender from rvc.train.process.model_information import model_information from rvc.train.process.extract_small_model import extract_small_model @@ -51,7 +49,6 @@ def get_config(): # Infer -@spaces.GPU(duration=120) def run_infer_script( pitch: int, filter_radius: int, @@ -73,8 +70,52 @@ def run_infer_script( f0_file: str, embedder_model: str, embedder_model_custom: str = None, + formant_shifting: bool = False, + formant_qfrency: float = 1.0, + formant_timbre: float = 1.0, + post_process: bool = False, + reverb: bool = False, + pitch_shift: bool = False, + limiter: bool = False, + gain: bool = False, + distortion: bool = False, + chorus: bool = False, + bitcrush: bool = False, + clipping: bool = False, + compressor: bool = False, + delay: bool = False, + *sliders: list, ): + if not sliders: + sliders = [0] * 25 infer_pipeline = import_voice_converter() + additional_params = { + "reverb_room_size": sliders[0], + "reverb_damping": sliders[1], + "reverb_wet_level": sliders[2], + "reverb_dry_level": sliders[3], + "reverb_width": sliders[4], + "reverb_freeze_mode": sliders[5], + "pitch_shift_semitones": sliders[6], + "limiter_threshold": sliders[7], + "limiter_release": sliders[8], + "gain_db": sliders[9], + "distortion_gain": sliders[10], + "chorus_rate": sliders[11], + "chorus_depth": sliders[12], + "chorus_delay": sliders[13], + "chorus_feedback": sliders[14], + "chorus_mix": sliders[15], + "bitcrush_bit_depth": sliders[16], + "clipping_threshold": sliders[17], + "compressor_threshold": sliders[18], + "compressor_ratio": sliders[19], + "compressor_attack": sliders[20], + "compressor_release": sliders[21], + "delay_seconds": sliders[22], + "delay_feedback": sliders[23], + "delay_mix": sliders[24], + } infer_pipeline.convert_audio( pitch=pitch, filter_radius=filter_radius, @@ -96,6 +137,21 @@ def run_infer_script( f0_file=f0_file, embedder_model=embedder_model, embedder_model_custom=embedder_model_custom, + formant_shifting=formant_shifting, + formant_qfrency=formant_qfrency, + formant_timbre=formant_timbre, + post_process=post_process, + reverb=reverb, + pitch_shift=pitch_shift, + limiter=limiter, + gain=gain, + distortion=distortion, + chorus=chorus, + bitcrush=bitcrush, + clipping=clipping, + compressor=compressor, + delay=delay, + sliders=additional_params, ) return f"File {input_path} inferred successfully.", output_path.replace( ".wav", f".{export_format.lower()}" @@ -103,7 +159,6 @@ def run_infer_script( # Batch infer -@spaces.GPU(duration=200) def run_batch_infer_script( pitch: int, filter_radius: int, @@ -125,52 +180,99 @@ def run_batch_infer_script( f0_file: str, embedder_model: str, embedder_model_custom: str = None, + formant_shifting: bool = False, + formant_qfrency: float = 1.0, + formant_timbre: float = 1.0, + post_process: bool = False, + reverb: bool = False, + pitch_shift: bool = False, + limiter: bool = False, + gain: bool = False, + distortion: bool = False, + chorus: bool = False, + bitcrush: bool = False, + clipping: bool = False, + compressor: bool = False, + delay: bool = False, + *sliders: list, ): audio_files = [ f for f in os.listdir(input_folder) if f.endswith((".mp3", ".wav", ".flac")) ] print(f"Detected {len(audio_files)} audio files for inference.") - - for audio_file in audio_files: - if "_output" in audio_file: - pass - else: - input_path = os.path.join(input_folder, audio_file) - output_file_name = os.path.splitext(os.path.basename(audio_file))[0] - output_path = os.path.join( - output_folder, - f"{output_file_name}_output{os.path.splitext(audio_file)[1]}", - ) - infer_pipeline = import_voice_converter() - print(f"Inferring {input_path}...") - infer_pipeline.convert_audio( - pitch=pitch, - filter_radius=filter_radius, - index_rate=index_rate, - volume_envelope=volume_envelope, - protect=protect, - hop_length=hop_length, - f0_method=f0_method, - audio_input_path=input_path, - audio_output_path=output_path, - model_path=pth_path, - index_path=index_path, - split_audio=split_audio, - f0_autotune=f0_autotune, - clean_audio=clean_audio, - clean_strength=clean_strength, - export_format=export_format, - upscale_audio=upscale_audio, - f0_file=f0_file, - embedder_model=embedder_model, - embedder_model_custom=embedder_model_custom, - ) + if not sliders: + sliders = [0] * 25 + infer_pipeline = import_voice_converter() + additional_params = { + "reverb_room_size": sliders[0], + "reverb_damping": sliders[1], + "reverb_wet_level": sliders[2], + "reverb_dry_level": sliders[3], + "reverb_width": sliders[4], + "reverb_freeze_mode": sliders[5], + "pitch_shift_semitones": sliders[6], + "limiter_threshold": sliders[7], + "limiter_release": sliders[8], + "gain_db": sliders[9], + "distortion_gain": sliders[10], + "chorus_rate": sliders[11], + "chorus_depth": sliders[12], + "chorus_delay": sliders[13], + "chorus_feedback": sliders[14], + "chorus_mix": sliders[15], + "bitcrush_bit_depth": sliders[16], + "clipping_threshold": sliders[17], + "compressor_threshold": sliders[18], + "compressor_ratio": sliders[19], + "compressor_attack": sliders[20], + "compressor_release": sliders[21], + "delay_seconds": sliders[22], + "delay_feedback": sliders[23], + "delay_mix": sliders[24], + } + infer_pipeline.convert_audio_batch( + pitch=pitch, + filter_radius=filter_radius, + index_rate=index_rate, + volume_envelope=volume_envelope, + protect=protect, + hop_length=hop_length, + f0_method=f0_method, + audio_input_paths=input_folder, + audio_output_path=output_folder, + model_path=pth_path, + index_path=index_path, + split_audio=split_audio, + f0_autotune=f0_autotune, + clean_audio=clean_audio, + clean_strength=clean_strength, + export_format=export_format, + upscale_audio=upscale_audio, + f0_file=f0_file, + embedder_model=embedder_model, + embedder_model_custom=embedder_model_custom, + formant_shifting=formant_shifting, + formant_qfrency=formant_qfrency, + formant_timbre=formant_timbre, + pid_file_path=os.path.join(now_dir, "assets", "infer_pid.txt"), + post_process=post_process, + reverb=reverb, + pitch_shift=pitch_shift, + limiter=limiter, + gain=gain, + distortion=distortion, + chorus=chorus, + bitcrush=bitcrush, + clipping=clipping, + compressor=compressor, + delay=delay, + sliders=additional_params, + ) return f"Files from {input_folder} inferred successfully." # TTS -@spaces.GPU(duration=120) def run_tts_script( tts_text: str, tts_voice: str, @@ -238,6 +340,21 @@ def run_tts_script( f0_file=f0_file, embedder_model=embedder_model, embedder_model_custom=embedder_model_custom, + formant_shifting=None, + formant_qfrency=None, + formant_timbre=None, + post_process=None, + reverb=None, + pitch_shift=None, + limiter=None, + gain=None, + distortion=None, + chorus=None, + bitcrush=None, + clipping=None, + compressor=None, + delay=None, + sliders=None, ) return f"Text {tts_text} synthesized successfully.", output_rvc_path.replace( @@ -246,9 +363,13 @@ def run_tts_script( # Preprocess -@spaces.GPU(duration=360) def run_preprocess_script( - model_name: str, dataset_path: str, sample_rate: int, cpu_cores: int + model_name: str, + dataset_path: str, + sample_rate: int, + cpu_cores: int, + cut_preprocess: bool, + process_effects: bool, ): config = get_config() per = 3.0 if config.is_half else 3.7 @@ -264,16 +385,16 @@ def run_preprocess_script( sample_rate, per, cpu_cores, + cut_preprocess, + process_effects, ], ), ] - os.makedirs(os.path.join(logs_path, model_name), exist_ok=True) subprocess.run(command) return f"Model {model_name} preprocessed successfully." # Extract -@spaces.GPU(duration=360) def run_extract_script( model_name: str, rvc_version: str, @@ -286,16 +407,13 @@ def run_extract_script( embedder_model: str, embedder_model_custom: str = None, ): - config = get_config() + model_path = os.path.join(logs_path, model_name) - pitch_extractor = os.path.join("rvc", "train", "extract", "pitch_extractor.py") - embedding_extractor = os.path.join( - "rvc", "train", "extract", "embedding_extractor.py" - ) + extract = os.path.join("rvc", "train", "extract", "extract.py") command_1 = [ python, - pitch_extractor, + extract, *map( str, [ @@ -304,34 +422,21 @@ def run_extract_script( hop_length, cpu_cores, gpu, - ], - ), - ] - - command_2 = [ - python, - embedding_extractor, - *map( - str, - [ - model_path, rvc_version, - gpu, + pitch_guidance, + sample_rate, embedder_model, embedder_model_custom, ], ), ] + subprocess.run(command_1) - subprocess.run(command_2) - generate_config(rvc_version, sample_rate, model_path) - generate_filelist(pitch_guidance, model_path, rvc_version, sample_rate) return f"Model {model_name} extracted successfully." # Train -@spaces.GPU(duration=360) def run_train_script( model_name: str, rvc_version: str, @@ -347,7 +452,8 @@ def run_train_script( overtraining_threshold: int, pretrained: bool, sync_graph: bool, - cache_data_in_gpu: bool, + index_algorithm: str = "Auto", + cache_data_in_gpu: bool = False, custom_pretrained: bool = False, g_pretrained_path: str = None, d_pretrained_path: str = None, @@ -396,19 +502,19 @@ def run_train_script( ), ] subprocess.run(command) - run_index_script(model_name, rvc_version) + run_index_script(model_name, rvc_version, index_algorithm) return f"Model {model_name} trained successfully." # Index -@spaces.GPU -def run_index_script(model_name: str, rvc_version: str): +def run_index_script(model_name: str, rvc_version: str, index_algorithm: str): index_script_path = os.path.join("rvc", "train", "process", "extract_index.py") command = [ python, index_script_path, os.path.join(logs_path, model_name), rvc_version, + index_algorithm, ] subprocess.run(command) @@ -416,7 +522,6 @@ def run_index_script(model_name: str, rvc_version: str): # Model extract -@spaces.GPU def run_model_extract_script( pth_path: str, model_name: str, @@ -433,9 +538,9 @@ def run_model_extract_script( # Model information -@spaces.GPU def run_model_information_script(pth_path: str): print(model_information(pth_path)) + return model_information(pth_path) # Model blender @@ -447,7 +552,6 @@ def run_model_blender_script( # Tensorboard -@spaces.GPU def run_tensorboard_script(): launch_tensorboard_pipeline() @@ -478,6 +582,19 @@ def run_audio_analyzer_script( return audio_info, plot_path +def run_model_author_script(model_author: str): + with open(os.path.join(now_dir, "assets", "config.json"), "r") as f: + config = json.load(f) + + config["model_author"] = model_author + + with open(os.path.join(now_dir, "assets", "config.json"), "w") as f: + json.dump(config, f, indent=4) + + print(f"Model author set to {model_author}.") + return f"Model author set to {model_author}." + + # API def run_api_script(ip: str, port: int): command = [ @@ -638,8 +755,9 @@ def parse_arguments(): help=embedder_model_description, choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], default="contentvec", @@ -666,6 +784,31 @@ def parse_arguments(): help=f0_file_description, default=None, ) + formant_shifting_description = "Apply formant shifting to the input audio. This can help adjust the timbre of the voice." + infer_parser.add_argument( + "--formant_shifting", + type=lambda x: bool(strtobool(x)), + choices=[True, False], + help=formant_shifting_description, + default=False, + required=False, + ) + formant_qfrency_description = "Control the frequency of the formant shifting effect. Higher values result in a more pronounced effect." + infer_parser.add_argument( + "--formant_qfrency", + type=float, + help=formant_qfrency_description, + default=1.0, + required=False, + ) + formant_timbre_description = "Control the timbre of the formant shifting effect. Higher values result in a more pronounced effect." + infer_parser.add_argument( + "--formant_timbre", + type=float, + help=formant_timbre_description, + default=1.0, + required=False, + ) # Parser for 'batch_infer' mode batch_infer_parser = subparsers.add_parser( @@ -789,8 +932,9 @@ def parse_arguments(): help=embedder_model_description, choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], default="contentvec", @@ -814,6 +958,28 @@ def parse_arguments(): help=f0_file_description, default=None, ) + batch_infer_parser.add_argument( + "--formant_shifting", + type=lambda x: bool(strtobool(x)), + choices=[True, False], + help=formant_shifting_description, + default=False, + required=False, + ) + batch_infer_parser.add_argument( + "--formant_qfrency", + type=float, + help=formant_qfrency_description, + default=1.0, + required=False, + ) + batch_infer_parser.add_argument( + "--formant_timbre", + type=float, + help=formant_timbre_description, + default=1.0, + required=False, + ) # Parser for 'tts' mode tts_parser = subparsers.add_parser("tts", help="Run TTS inference") @@ -951,8 +1117,9 @@ def parse_arguments(): help=embedder_model_description, choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], default="contentvec", @@ -1000,6 +1167,22 @@ def parse_arguments(): help="Number of CPU cores to use for preprocessing.", choices=range(1, 65), ) + preprocess_parser.add_argument( + "--cut_preprocess", + type=lambda x: bool(strtobool(x)), + choices=[True, False], + help="Cut the dataset into smaller segments for faster preprocessing.", + default=True, + required=False, + ) + preprocess_parser.add_argument( + "--process_effects", + type=lambda x: bool(strtobool(x)), + choices=[True, False], + help="Disable all filters during preprocessing.", + default=False, + required=False, + ) # Parser for 'extract' mode extract_parser = subparsers.add_parser( @@ -1066,8 +1249,9 @@ def parse_arguments(): help=embedder_model_description, choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], default="contentvec", @@ -1202,6 +1386,14 @@ def parse_arguments(): help="Cache training data in GPU memory.", default=False, ) + train_parser.add_argument( + "--index_algorithm", + type=str, + choices=["Auto", "Faiss", "KMeans"], + help="Choose the method for generating the index file.", + default="Auto", + required=False, + ) # Parser for 'index' mode index_parser = subparsers.add_parser( @@ -1217,6 +1409,14 @@ def parse_arguments(): choices=["v1", "v2"], default="v2", ) + index_parser.add_argument( + "--index_algorithm", + type=str, + choices=["Auto", "Faiss", "KMeans"], + help="Choose the method for generating the index file.", + default="Auto", + required=False, + ) # Parser for 'model_extract' mode model_extract_parser = subparsers.add_parser( @@ -1450,6 +1650,8 @@ def main(): dataset_path=args.dataset_path, sample_rate=args.sample_rate, cpu_cores=args.cpu_cores, + cut_preprocess=args.cut_preprocess, + process_effects=args.process_effects, ) elif args.mode == "extract": run_extract_script( @@ -1481,6 +1683,7 @@ def main(): pretrained=args.pretrained, custom_pretrained=args.custom_pretrained, sync_graph=args.sync_graph, + index_algorithm=args.index_algorithm, cache_data_in_gpu=args.cache_data_in_gpu, g_pretrained_path=args.g_pretrained_path, d_pretrained_path=args.d_pretrained_path, @@ -1489,6 +1692,7 @@ def main(): run_index_script( model_name=args.model_name, rvc_version=args.rvc_version, + index_algorithm=args.index_algorithm, ) elif args.mode == "model_extract": run_model_extract_script( diff --git a/logs/mute/f0/mute.wav.npy b/logs/mute/f0/mute.wav.npy deleted file mode 100644 index dd7e9afd2e7f2aefaa30bcd4541a23ce96a9e150..0000000000000000000000000000000000000000 --- a/logs/mute/f0/mute.wav.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b9acf9ab7facdb032e1d687fe35182670b0b94566c4b209ae48c239d19956a6 -size 1332 diff --git a/logs/mute/f0_voiced/mute.wav.npy b/logs/mute/f0_voiced/mute.wav.npy deleted file mode 100644 index 7644e325ddd34bd186153ecf7461aa1593a054f3..0000000000000000000000000000000000000000 --- a/logs/mute/f0_voiced/mute.wav.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:30792849c8e72d67e6691754077f2888b101cb741e9c7f193c91dd9692870c87 -size 2536 diff --git a/logs/mute/sliced_audios/mute32000.wav b/logs/mute/sliced_audios/mute32000.wav deleted file mode 100644 index b4b5029205bf72dee5856bbe0c65c34337dc8dd4..0000000000000000000000000000000000000000 Binary files a/logs/mute/sliced_audios/mute32000.wav and /dev/null differ diff --git a/logs/mute/sliced_audios/mute40000.wav b/logs/mute/sliced_audios/mute40000.wav deleted file mode 100644 index fcf1281d4d1b01417a4d6738022f4402594a6802..0000000000000000000000000000000000000000 Binary files a/logs/mute/sliced_audios/mute40000.wav and /dev/null differ diff --git a/logs/mute/sliced_audios/mute48000.wav b/logs/mute/sliced_audios/mute48000.wav deleted file mode 100644 index 72822a01251e77d7d2a4a7da9d94805426829083..0000000000000000000000000000000000000000 Binary files a/logs/mute/sliced_audios/mute48000.wav and /dev/null differ diff --git a/logs/mute/sliced_audios_16k/mute.wav b/logs/mute/sliced_audios_16k/mute.wav deleted file mode 100644 index 27a7d638558539c521aacf8c0f34bd0d4816aa9d..0000000000000000000000000000000000000000 Binary files a/logs/mute/sliced_audios_16k/mute.wav and /dev/null differ diff --git a/logs/mute/v1_extracted/mute.npy b/logs/mute/v1_extracted/mute.npy deleted file mode 100644 index c57ae95d19d969788ef186a81cdc2f4b462ed6df..0000000000000000000000000000000000000000 --- a/logs/mute/v1_extracted/mute.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:64d5abbac078e19a3f649c0d78a02cb33a71407ded3ddf2db78e6b803d0c0126 -size 152704 diff --git a/logs/mute/v2_extracted/mute.npy b/logs/mute/v2_extracted/mute.npy deleted file mode 100644 index ea5f9dddca08ff210791b27e3db3fc5676eabc90..0000000000000000000000000000000000000000 --- a/logs/mute/v2_extracted/mute.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:16ef62b957887ac9f0913aa5158f18983afff1ef5a3e4c5fd067ac20fc380d54 -size 457856 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5d31f75124bb2930e603df675967023061068351..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1,49 +0,0 @@ -# General dependencies -pip==23.3; sys_platform == 'darwin' -wheel; sys_platform == 'darwin' -PyYAML; sys_platform == 'darwin' -ffmpeg-python>=0.2.0 -numpy==1.23.5 -requests==2.32.0 -tqdm -wget - -# Audio processing -faiss-cpu==1.7.3 -librosa==0.9.2 -pyworld==0.3.4 -scipy==1.11.1 -soundfile==0.12.1 -praat-parselmouth -noisereduce -audio_upscaler==0.1.4 - -# Machine learning -omegaconf==2.0.5; sys_platform == 'darwin' -git+https://github.com/jeroenbnn/fairseqApplio; sys_platform == 'linux' -fairseq==0.12.2; sys_platform == 'darwin' or sys_platform == 'win32' -numba; sys_platform == 'linux' -numba==0.57.0; sys_platform == 'darwin' or sys_platform == 'win32' -torchaudio==2.1.1 -torch==2.1.1 -torchcrepe==0.0.23 -torchvision==0.16.1 -einops -libf0 -torchfcpe - -# Visualization -matplotlib==3.7.2 -tensorboard -gradio==4.36.0 - -# Miscellaneous -certifi==2024.7.4; sys_platform == 'darwin' -antlr4-python3-runtime==4.8; sys_platform == 'darwin' -ffmpy==0.3.1 -tensorboardX -edge-tts==6.1.9 -pypresence -beautifulsoup4 -flask -local-attention diff --git a/run-applio.bat b/run-applio.bat new file mode 100644 index 0000000000000000000000000000000000000000..86de62e1856e3912c320ddd5f77bbf090ed21b56 --- /dev/null +++ b/run-applio.bat @@ -0,0 +1,13 @@ +@echo off +setlocal +title Applio + +if not exist env ( + echo Please run 'run-install.bat' first to set up the environment. + pause + exit /b 1 +) + +env\python.exe app.py --open +echo. +pause \ No newline at end of file diff --git a/run-install.bat b/run-install.bat new file mode 100644 index 0000000000000000000000000000000000000000..3b3a6a2dc3dd6ba1ddbdd997953c6e5722188f1b --- /dev/null +++ b/run-install.bat @@ -0,0 +1,76 @@ +@echo off +setlocal enabledelayedexpansion +title Applio Installer + +echo Welcome to the Applio Installer! +echo. + +set "principal=%cd%" +set "CONDA_ROOT_PREFIX=%UserProfile%\Miniconda3" +set "INSTALL_ENV_DIR=%principal%\env" +set "MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py39_23.9.0-0-Windows-x86_64.exe" +set "CONDA_EXECUTABLE=%CONDA_ROOT_PREFIX%\Scripts\conda.exe" + +echo Cleaning up unnecessary files... +for %%F in (Makefile Dockerfile docker-compose.yaml *.sh) do ( + if exist "%%F" del "%%F" +) +echo Cleanup complete. +echo. + +if not exist "%CONDA_EXECUTABLE%" ( + echo Miniconda not found. Starting download and installation... + echo Downloading Miniconda... + powershell -Command "& {Invoke-WebRequest -Uri '%MINICONDA_DOWNLOAD_URL%' -OutFile 'miniconda.exe'}" + if not exist "miniconda.exe" ( + echo Download failed. Please check your internet connection and try again. + goto :error + ) + + echo Installing Miniconda... + start /wait "" miniconda.exe /InstallationType=JustMe /RegisterPython=0 /S /D=%CONDA_ROOT_PREFIX% + if errorlevel 1 ( + echo Miniconda installation failed. + goto :error + ) + del miniconda.exe + echo Miniconda installation complete. +) else ( + echo Miniconda already installed. Skipping installation. +) +echo. + +echo Creating Conda environment... +call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.9 +if errorlevel 1 goto :error +echo Conda environment created successfully. +echo. + +if exist "%INSTALL_ENV_DIR%\python.exe" ( + echo Installing specific pip version... + "%INSTALL_ENV_DIR%\python.exe" -m pip install "pip<24.1" + if errorlevel 1 goto :error + echo Pip installation complete. + echo. +) + +echo Installing dependencies... +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || goto :error +pip install --upgrade setuptools || goto :error +pip install -r "%principal%\requirements.txt" || goto :error +pip uninstall torch torchvision torchaudio -y +pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu121 || goto :error +call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" deactivate +echo Dependencies installation complete. +echo + +echo Applio has been installed successfully! +echo To start Applio, please run 'run-applio.bat'. +echo. +pause +exit /b 0 + +:error +echo An error occurred during installation. Please check the output above for details. +pause +exit /b 1 \ No newline at end of file diff --git a/run-install.sh b/run-install.sh old mode 100755 new mode 100644 index f07e4231d23a03c097dc300f92f3d38afe9f3ea9..563df03ae9de2be6aeb3bbed85fa2862cf938da8 --- a/run-install.sh +++ b/run-install.sh @@ -32,12 +32,10 @@ prepare_install() { exit 1 fi fi - $py -m venv .venv . .venv/bin/activate - python -m ensurepip - # Update pip within the virtual environment - pip3 install --upgrade pip + echo "Installing pip version less than 24.1..." + python -m pip install "pip<24.1" echo echo "Installing Applio dependencies..." python -m pip install -r requirements.txt @@ -84,4 +82,4 @@ elif [ "$(uname)" != "Linux" ]; then exit 1 fi -prepare_install \ No newline at end of file +prepare_install diff --git a/run-tensorboard.bat b/run-tensorboard.bat new file mode 100644 index 0000000000000000000000000000000000000000..1817dc561d8eff3e20136bbfb1bd6e32e82b00ef --- /dev/null +++ b/run-tensorboard.bat @@ -0,0 +1,6 @@ +@echo off +setlocal +title Tensorboard + +env\python.exe core.py tensorboard +pause \ No newline at end of file diff --git a/rvc/configs/config.py b/rvc/configs/config.py index 07bd818d2005d1f51dcd5669955b8fe21231ae0a..51eb0e2bb72063101359b5d920455c79b48e4e6c 100644 --- a/rvc/configs/config.py +++ b/rvc/configs/config.py @@ -131,6 +131,13 @@ class Config: def set_cuda_config(self): i_device = int(self.device.split(":")[-1]) self.gpu_name = torch.cuda.get_device_name(i_device) + # Zluda + if self.gpu_name.endswith("[ZLUDA]"): + print("Zluda compatibility enabled, experimental feature.") + torch.backends.cudnn.enabled = False + torch.backends.cuda.enable_flash_sdp(False) + torch.backends.cuda.enable_math_sdp(True) + torch.backends.cuda.enable_mem_efficient_sdp(False) low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"] if ( any(gpu in self.gpu_name for gpu in low_end_gpus) @@ -163,9 +170,17 @@ def get_gpu_info(): torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4 ) - gpu_infos.append("%s: %s %s GB" % (i, gpu_name, mem)) + gpu_infos.append(f"{i}: {gpu_name} ({mem} GB)") if len(gpu_infos) > 0: gpu_info = "\n".join(gpu_infos) else: gpu_info = "Unfortunately, there is no compatible GPU available to support your training." return gpu_info + + +def get_number_of_gpus(): + if torch.cuda.is_available(): + num_gpus = torch.cuda.device_count() + return "-".join(map(str, range(num_gpus))) + else: + return "-" diff --git a/rvc/infer/infer.py b/rvc/infer/infer.py index a943f9bad5ccb769965bd7ea1d330bb8948b2dc5..56cfd6a842836820b73439ceef72b9f20b480e80 100644 --- a/rvc/infer/infer.py +++ b/rvc/infer/infer.py @@ -8,6 +8,19 @@ import traceback import numpy as np import soundfile as sf import noisereduce as nr +from pedalboard import ( + Pedalboard, + Chorus, + Distortion, + Reverb, + PitchShift, + Limiter, + Gain, + Bitcrush, + Clipping, + Compressor, + Delay, +) from scipy.io import wavfile from audio_upscaler import upscale @@ -16,7 +29,7 @@ now_dir = os.getcwd() sys.path.append(now_dir) from rvc.infer.pipeline import Pipeline as VC -from rvc.lib.utils import load_audio, load_embedding +from rvc.lib.utils import load_audio_infer, load_embedding from rvc.lib.tools.split_audio import process_audio, merge_audio from rvc.lib.algorithm.synthesizers import Synthesizer from rvc.configs.config import Config @@ -24,6 +37,7 @@ from rvc.configs.config import Config logging.getLogger("httpx").setLevel(logging.WARNING) logging.getLogger("httpcore").setLevel(logging.WARNING) logging.getLogger("faiss").setLevel(logging.WARNING) +logging.getLogger("faiss.loader").setLevel(logging.WARNING) class VoiceConverter: @@ -39,6 +53,7 @@ class VoiceConverter: self.hubert_model = ( None # Initialize the Hubert model (for embedding extraction) ) + self.last_embedder_model = None # Last used embedder model self.tgt_sr = None # Target sampling rate for the output audio self.net_g = None # Generator network for voice conversion self.vc = None # Voice conversion pipeline instance @@ -55,8 +70,8 @@ class VoiceConverter: embedder_model (str): Path to the pre-trained HuBERT model. embedder_model_custom (str): Path to the custom HuBERT model. """ - models, _, _ = load_embedding(embedder_model, embedder_model_custom) - self.hubert_model = models[0].to(self.config.device) + self.hubert_model = load_embedding(embedder_model, embedder_model_custom) + self.hubert_model.to(self.config.device) self.hubert_model = ( self.hubert_model.half() if self.config.is_half @@ -117,6 +132,107 @@ class VoiceConverter: except Exception as error: print(f"An error occurred converting the audio format: {error}") + @staticmethod + def post_process_audio( + audio_input, + sample_rate, + reverb: bool, + reverb_room_size: float, + reverb_damping: float, + reverb_wet_level: float, + reverb_dry_level: float, + reverb_width: float, + reverb_freeze_mode: float, + pitch_shift: bool, + pitch_shift_semitones: int, + limiter: bool, + limiter_threshold: float, + limiter_release: float, + gain: bool, + gain_db: float, + distortion: bool, + distortion_gain: float, + chorus: bool, + chorus_rate: float, + chorus_depth: float, + chorus_delay: float, + chorus_feedback: float, + chorus_mix: float, + bitcrush: bool, + bitcrush_bit_depth: int, + clipping: bool, + clipping_threshold: float, + compressor: bool, + compressor_threshold: float, + compressor_ratio: float, + compressor_attack: float, + compressor_release: float, + delay: bool, + delay_seconds: float, + delay_feedback: float, + delay_mix: float, + audio_output_path: str, + ): + board = Pedalboard() + if reverb: + reverb = Reverb( + room_size=reverb_room_size, + damping=reverb_damping, + wet_level=reverb_wet_level, + dry_level=reverb_dry_level, + width=reverb_width, + freeze_mode=reverb_freeze_mode, + ) + board.append(reverb) + if pitch_shift: + pitch_shift = PitchShift(semitones=pitch_shift_semitones) + board.append(pitch_shift) + if limiter: + limiter = Limiter( + threshold_db=limiter_threshold, release_ms=limiter_release + ) + board.append(limiter) + if gain: + gain = Gain(gain_db=gain_db) + board.append(gain) + if distortion: + distortion = Distortion(drive_db=distortion_gain) + board.append(distortion) + if chorus: + chorus = Chorus( + rate_hz=chorus_rate, + depth=chorus_depth, + centre_delay_ms=chorus_delay, + feedback=chorus_feedback, + mix=chorus_mix, + ) + board.append(chorus) + if bitcrush: + bitcrush = Bitcrush(bit_depth=bitcrush_bit_depth) + board.append(bitcrush) + if clipping: + clipping = Clipping(threshold_db=clipping_threshold) + board.append(clipping) + if compressor: + compressor = Compressor( + threshold_db=compressor_threshold, + ratio=compressor_ratio, + attack_ms=compressor_attack, + release_ms=compressor_release, + ) + board.append(compressor) + if delay: + delay = Delay( + delay_seconds=delay_seconds, + feedback=delay_feedback, + mix=delay_mix, + ) + board.append(delay) + audio_input, sample_rate = librosa.load(audio_input, sr=sample_rate) + output = board(audio_input, sample_rate) + sf.write(audio_output_path, output, sample_rate, format="WAV") + return audio_output_path + def convert_audio( self, audio_input_path: str, @@ -139,6 +255,21 @@ class VoiceConverter: clean_strength: float, export_format: str, upscale_audio: bool, + formant_shifting: bool, + formant_qfrency: float, + formant_timbre: float, + post_process: bool, + reverb: bool, + pitch_shift: bool, + limiter: bool, + gain: bool, + distortion: bool, + chorus: bool, + bitcrush: bool, + clipping: bool, + compressor: bool, + delay: bool, + sliders: dict, resample_sr: int = 0, sid: int = 0, ): @@ -168,7 +299,20 @@ class VoiceConverter: clean_strength (float, optional): Strength of the audio cleaning. Default is 0.7. export_format (str, optional): Format for exporting the audio. Default is "WAV". upscale_audio (bool, optional): Whether to upscale the audio. Default is False. - + formant_shift (bool, optional): Whether to shift the formants. Default is False. + formant_qfrency (float, optional): Formant frequency. Default is 1.0. + formant_timbre (float, optional): Formant timbre. Default is 1.0. + reverb (bool, optional): Whether to apply reverb. Default is False. + pitch_shift (bool, optional): Whether to apply pitch shift. Default is False. + limiter (bool, optional): Whether to apply a limiter. Default is False. + gain (bool, optional): Whether to apply gain. Default is False. + distortion (bool, optional): Whether to apply distortion. Default is False. + chorus (bool, optional): Whether to apply chorus. Default is False. + bitcrush (bool, optional): Whether to apply bitcrush. Default is False. + clipping (bool, optional): Whether to apply clipping. Default is False. + compressor (bool, optional): Whether to apply a compressor. Default is False. + delay (bool, optional): Whether to apply delay. Default is False. + sliders (dict, optional): Dictionary of effect parameters. Default is None. """ self.get_vc(model_path, sid) @@ -178,15 +322,21 @@ class VoiceConverter: if upscale_audio == True: upscale(audio_input_path, audio_input_path) - - audio = load_audio(audio_input_path, 16000) + audio = load_audio_infer( + audio_input_path, + 16000, + formant_shifting, + formant_qfrency, + formant_timbre, + ) audio_max = np.abs(audio).max() / 0.95 if audio_max > 1: audio /= audio_max - if not self.hubert_model: + if not self.hubert_model or embedder_model != self.last_embedder_model: self.load_hubert(embedder_model, embedder_model_custom) + self.last_embedder_model = embedder_model file_index = ( index_path.strip() @@ -240,6 +390,21 @@ class VoiceConverter: embedder_model_custom=embedder_model_custom, clean_audio=clean_audio, clean_strength=clean_strength, + formant_shifting=formant_shifting, + formant_qfrency=formant_qfrency, + formant_timbre=formant_timbre, + post_process=post_process, + reverb=reverb, + pitch_shift=pitch_shift, + limiter=limiter, + gain=gain, + distortion=distortion, + chorus=chorus, + bitcrush=bitcrush, + clipping=clipping, + compressor=compressor, + delay=delay, + sliders=sliders, ) except Exception as error: print(f"An error occurred processing the segmented audio: {error}") @@ -252,6 +417,48 @@ class VoiceConverter: ) self.tgt_sr, audio_opt = merge_audio(merge_timestamps_file) os.remove(merge_timestamps_file) + if post_process: + audio_opt = self.post_process_audio( + audio_input=audio_opt, + sample_rate=self.tgt_sr, + reverb=reverb, + reverb_room_size=sliders[0], + reverb_damping=sliders[1], + reverb_wet_level=sliders[2], + reverb_dry_level=sliders[3], + reverb_width=sliders[4], + reverb_freeze_mode=sliders[5], + pitch_shift=pitch_shift, + pitch_shift_semitones=sliders[6], + limiter=limiter, + limiter_threshold=sliders[7], + limiter_release=sliders[8], + gain=gain, + gain_db=sliders[9], + distortion=distortion, + distortion_gain=sliders[10], + chorus=chorus, + chorus_rate=sliders[11], + chorus_depth=sliders[12], + chorus_delay=sliders[13], + chorus_feedback=sliders[14], + chorus_mix=sliders[15], + bitcrush=bitcrush, + bitcrush_bit_depth=sliders[16], + clipping=clipping, + clipping_threshold=sliders[17], + compressor=compressor, + compressor_threshold=sliders[18], + compressor_ratio=sliders[19], + compressor_attack=sliders[20], + compressor_release=sliders[21], + delay=delay, + delay_seconds=sliders[22], + delay_feedback=sliders[23], + delay_mix=sliders[24], + audio_output_path=audio_output_path, + ) + sf.write(audio_output_path, audio_opt, self.tgt_sr, format="WAV") else: audio_opt = self.vc.pipeline( model=self.hubert_model, @@ -286,7 +493,47 @@ class VoiceConverter: sf.write( audio_output_path, cleaned_audio, self.tgt_sr, format="WAV" ) - + if post_process: + audio_output_path = self.post_process_audio( + audio_input=audio_output_path, + sample_rate=self.tgt_sr, + reverb=reverb, + reverb_room_size=sliders["reverb_room_size"], + reverb_damping=sliders["reverb_damping"], + reverb_wet_level=sliders["reverb_wet_level"], + reverb_dry_level=sliders["reverb_dry_level"], + reverb_width=sliders["reverb_width"], + reverb_freeze_mode=sliders["reverb_freeze_mode"], + pitch_shift=pitch_shift, + pitch_shift_semitones=sliders["pitch_shift_semitones"], + limiter=limiter, + limiter_threshold=sliders["limiter_threshold"], + limiter_release=sliders["limiter_release"], + gain=gain, + gain_db=sliders["gain_db"], + distortion=distortion, + distortion_gain=sliders["distortion_gain"], + chorus=chorus, + chorus_rate=sliders["chorus_rate"], + chorus_depth=sliders["chorus_depth"], + chorus_delay=sliders["chorus_delay"], + chorus_feedback=sliders["chorus_feedback"], + chorus_mix=sliders["chorus_mix"], + bitcrush=bitcrush, + bitcrush_bit_depth=sliders["bitcrush_bit_depth"], + clipping=clipping, + clipping_threshold=sliders["clipping_threshold"], + compressor=compressor, + compressor_threshold=sliders["compressor_threshold"], + compressor_ratio=sliders["compressor_ratio"], + compressor_attack=sliders["compressor_attack"], + compressor_release=sliders["compressor_release"], + delay=delay, + delay_seconds=sliders["delay_seconds"], + delay_feedback=sliders["delay_feedback"], + delay_mix=sliders["delay_mix"], + audio_output_path=audio_output_path, + ) output_path_format = audio_output_path.replace( ".wav", f".{export_format.lower()}" ) @@ -303,6 +550,344 @@ class VoiceConverter: print(f"An error occurred during audio conversion: {error}") print(traceback.format_exc()) + def convert_audio_batch( + self, + audio_input_paths: str, + audio_output_path: str, + model_path: str, + index_path: str, + embedder_model: str, + pitch: int, + f0_file: str, + f0_method: str, + index_rate: float, + volume_envelope: int, + protect: float, + hop_length: int, + split_audio: bool, + f0_autotune: bool, + filter_radius: int, + embedder_model_custom: str, + clean_audio: bool, + clean_strength: float, + export_format: str, + upscale_audio: bool, + formant_shifting: bool, + formant_qfrency: float, + formant_timbre: float, + resample_sr: int = 0, + sid: int = 0, + pid_file_path: str = None, + post_process: bool = False, + reverb: bool = False, + pitch_shift: bool = False, + limiter: bool = False, + gain: bool = False, + distortion: bool = False, + chorus: bool = False, + bitcrush: bool = False, + clipping: bool = False, + compressor: bool = False, + delay: bool = False, + sliders: dict = None, + ): + """ + Performs voice conversion on a batch of input audio files. + + Args: + audio_input_paths (list): List of paths to the input audio files. + audio_output_path (str): Path to the output audio file. + model_path (str): Path to the voice conversion model. + index_path (str): Path to the index file. + sid (int, optional): Speaker ID. Default is 0. + pitch (str, optional): Key for F0 up-sampling. Default is None. + f0_file (str, optional): Path to the F0 file. Default is None. + f0_method (str, optional): Method for F0 extraction. Default is None. + index_rate (float, optional): Rate for index matching. Default is None. + resample_sr (int, optional): Resample sampling rate. Default is 0. + volume_envelope (float, optional): RMS mix rate. Default is None. + protect (float, optional): Protection rate for certain audio segments. Default is None. + hop_length (int, optional): Hop length for audio processing. Default is None. + split_audio (bool, optional): Whether to split the audio for processing. Default is False. + f0_autotune (bool, optional): Whether to use F0 autotune. Default is False. + filter_radius (int, optional): Radius for filtering. Default is None. + embedder_model (str, optional): Path to the embedder model. Default is None. + embedder_model_custom (str, optional): Path to the custom embedder model. Default is None. + clean_audio (bool, optional): Whether to clean the audio. Default is False. + clean_strength (float, optional): Strength of the audio cleaning. Default is 0.7. + export_format (str, optional): Format for exporting the audio. Default is "WAV". + upscale_audio (bool, optional): Whether to upscale the audio. Default is False. + formant_shift (bool, optional): Whether to shift the formants. Default is False. + formant_qfrency (float, optional): Formant frequency. Default is 1.0. + formant_timbre (float, optional): Formant timbre. Default is 1.0. + pid_file_path (str, optional): Path to the PID file. Default is None. + post_process (bool, optional): Whether to apply post-processing effects. Default is False. + reverb (bool, optional): Whether to apply reverb. Default is False. + pitch_shift (bool, optional): Whether to apply pitch shift. Default is False. + limiter (bool, optional): Whether to apply a limiter. Default is False. + gain (bool, optional): Whether to apply gain. Default is False. + distortion (bool, optional): Whether to apply distortion. Default is False. + chorus (bool, optional): Whether to apply chorus. Default is False. + bitcrush (bool, optional): Whether to apply bitcrush. Default is False. + clipping (bool, optional): Whether to apply clipping. Default is False. + compressor (bool, optional): Whether to apply a compressor. Default is False. + delay (bool, optional): Whether to apply delay. Default is False. + sliders (dict, optional): Dictionary of effect parameters. Default is None. + + """ + pid = os.getpid() + with open(pid_file_path, "w") as pid_file: + pid_file.write(str(pid)) + try: + if not self.hubert_model or embedder_model != self.last_embedder_model: + self.load_hubert(embedder_model, embedder_model_custom) + self.last_embedder_model = embedder_model + self.get_vc(model_path, sid) + file_index = ( + index_path.strip() + .strip('"') + .strip("\n") + .strip('"') + .strip() + .replace("trained", "added") + ) + start_time = time.time() + print(f"Converting audio batch '{audio_input_paths}'...") + audio_files = [ + f + for f in os.listdir(audio_input_paths) + if f.endswith((".mp3", ".wav", ".flac", ".m4a", ".ogg", ".opus")) + ] + print(f"Detected {len(audio_files)} audio files for inference.") + for i, audio_input_path in enumerate(audio_files): + audio_output_paths = os.path.join( + audio_output_path, + f"{os.path.splitext(os.path.basename(audio_input_path))[0]}_output.{export_format.lower()}", + ) + if os.path.exists(audio_output_paths): + continue + print(f"Converting audio '{audio_input_path}'...") + audio_input_path = os.path.join(audio_input_paths, audio_input_path) + + if upscale_audio == True: + upscale(audio_input_path, audio_input_path) + audio = load_audio_infer( + audio_input_path, + 16000, + formant_shifting, + formant_qfrency, + formant_timbre, + ) + audio_max = np.abs(audio).max() / 0.95 + + if audio_max > 1: + audio /= audio_max + + if self.tgt_sr != resample_sr >= 16000: + self.tgt_sr = resample_sr + + if split_audio: + result, new_dir_path = process_audio(audio_input_path) + if result == "Error": + return "Error with Split Audio", None + + dir_path = ( + new_dir_path.strip().strip('"').strip("\n").strip('"').strip() + ) + if dir_path: + paths = [ + os.path.join(root, name) + for root, _, files in os.walk(dir_path, topdown=False) + for name in files + if name.endswith(".wav") and root == dir_path + ] + try: + for path in paths: + self.convert_audio( + audio_input_path=path, + audio_output_path=path, + model_path=model_path, + index_path=index_path, + sid=sid, + pitch=pitch, + f0_file=None, + f0_method=f0_method, + index_rate=index_rate, + resample_sr=resample_sr, + volume_envelope=volume_envelope, + protect=protect, + hop_length=hop_length, + split_audio=False, + f0_autotune=f0_autotune, + filter_radius=filter_radius, + export_format=export_format, + upscale_audio=upscale_audio, + embedder_model=embedder_model, + embedder_model_custom=embedder_model_custom, + clean_audio=clean_audio, + clean_strength=clean_strength, + formant_shifting=formant_shifting, + formant_qfrency=formant_qfrency, + formant_timbre=formant_timbre, + post_process=post_process, + reverb=reverb, + pitch_shift=pitch_shift, + limiter=limiter, + gain=gain, + distortion=distortion, + chorus=chorus, + bitcrush=bitcrush, + clipping=clipping, + compressor=compressor, + delay=delay, + sliders=sliders, + ) + except Exception as error: + print( + f"An error occurred processing the segmented audio: {error}" + ) + print(traceback.format_exc()) + return f"Error {error}" + print("Finished processing segmented audio, now merging audio...") + merge_timestamps_file = os.path.join( + os.path.dirname(new_dir_path), + f"{os.path.basename(audio_input_path).split('.')[0]}_timestamps.txt", + ) + self.tgt_sr, audio_opt = merge_audio(merge_timestamps_file) + os.remove(merge_timestamps_file) + if post_process: + audio_opt = self.post_process_audio( + audio_input=audio_opt, + sample_rate=self.tgt_sr, + reverb=reverb, + reverb_room_size=sliders[0], + reverb_damping=sliders[1], + reverb_wet_level=sliders[2], + reverb_dry_level=sliders[3], + reverb_width=sliders[4], + reverb_freeze_mode=sliders[5], + pitch_shift=pitch_shift, + pitch_shift_semitones=sliders[6], + limiter=limiter, + limiter_threshold=sliders[7], + limiter_release=sliders[8], + gain=gain, + gain_db=sliders[9], + distortion=distortion, + distortion_gain=sliders[10], + chorus=chorus, + chorus_rate=sliders[11], + chorus_depth=sliders[12], + chorus_delay=sliders[13], + chorus_feedback=sliders[14], + chorus_mix=sliders[15], + bitcrush=bitcrush, + bitcrush_bit_depth=sliders[16], + clipping=clipping, + clipping_threshold=sliders[17], + compressor=compressor, + compressor_threshold=sliders[18], + compressor_ratio=sliders[19], + compressor_attack=sliders[20], + compressor_release=sliders[21], + delay=delay, + delay_seconds=sliders[22], + delay_feedback=sliders[23], + delay_mix=sliders[24], + audio_output_path=audio_output_paths, + ) + sf.write( + audio_output_paths, audio_opt, self.tgt_sr, format="WAV" + ) + else: + audio_opt = self.vc.pipeline( + model=self.hubert_model, + net_g=self.net_g, + sid=sid, + audio=audio, + input_audio_path=audio_input_path, + pitch=pitch, + f0_method=f0_method, + file_index=file_index, + index_rate=index_rate, + pitch_guidance=self.use_f0, + filter_radius=filter_radius, + tgt_sr=self.tgt_sr, + resample_sr=resample_sr, + volume_envelope=volume_envelope, + version=self.version, + protect=protect, + hop_length=hop_length, + f0_autotune=f0_autotune, + f0_file=f0_file, + ) + + if audio_output_paths: + sf.write(audio_output_paths, audio_opt, self.tgt_sr, format="WAV") + + if clean_audio: + cleaned_audio = self.remove_audio_noise( + audio_output_paths, clean_strength + ) + if cleaned_audio is not None: + sf.write( + audio_output_paths, cleaned_audio, self.tgt_sr, format="WAV" + ) + if post_process: + audio_output_paths = self.post_process_audio( + audio_input=audio_output_paths, + sample_rate=self.tgt_sr, + reverb=reverb, + reverb_room_size=sliders["reverb_room_size"], + reverb_damping=sliders["reverb_damping"], + reverb_wet_level=sliders["reverb_wet_level"], + reverb_dry_level=sliders["reverb_dry_level"], + reverb_width=sliders["reverb_width"], + reverb_freeze_mode=sliders["reverb_freeze_mode"], + pitch_shift=pitch_shift, + pitch_shift_semitones=sliders["pitch_shift_semitones"], + limiter=limiter, + limiter_threshold=sliders["limiter_threshold"], + limiter_release=sliders["limiter_release"], + gain=gain, + gain_db=sliders["gain_db"], + distortion=distortion, + distortion_gain=sliders["distortion_gain"], + chorus=chorus, + chorus_rate=sliders["chorus_rate"], + chorus_depth=sliders["chorus_depth"], + chorus_delay=sliders["chorus_delay"], + chorus_feedback=sliders["chorus_feedback"], + chorus_mix=sliders["chorus_mix"], + bitcrush=bitcrush, + bitcrush_bit_depth=sliders["bitcrush_bit_depth"], + clipping=clipping, + clipping_threshold=sliders["clipping_threshold"], + compressor=compressor, + compressor_threshold=sliders["compressor_threshold"], + compressor_ratio=sliders["compressor_ratio"], + compressor_attack=sliders["compressor_attack"], + compressor_release=sliders["compressor_release"], + delay=delay, + delay_seconds=sliders["delay_seconds"], + delay_feedback=sliders["delay_feedback"], + delay_mix=sliders["delay_mix"], + audio_output_path=audio_output_paths, + ) + output_path_format = audio_output_paths.replace( + ".wav", f".{export_format.lower()}" + ) + audio_output_paths = self.convert_audio_format( + audio_output_paths, output_path_format, export_format + ) + print(f"Conversion completed at '{audio_output_paths}'.") + elapsed_time = time.time() - start_time + print(f"Batch conversion completed in {elapsed_time:.2f} seconds.") + os.remove(pid_file_path) + except Exception as error: + print(f"An error occurred during audio conversion: {error}") + print(traceback.format_exc()) + def get_vc(self, weight_root, sid): """ Loads the voice conversion model and sets up the pipeline. diff --git a/rvc/infer/pipeline.py b/rvc/infer/pipeline.py index cb4f6bd4a4224366a90fd64bf768776d8e3b1f2a..e3eae01ae1ee2b341ad072333f1434e46814f212 100644 --- a/rvc/infer/pipeline.py +++ b/rvc/infer/pipeline.py @@ -425,14 +425,11 @@ class Pipeline: feats = feats.view(1, -1) padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) - inputs = { - "source": feats.to(self.device), - "padding_mask": padding_mask, - "output_layer": 9 if version == "v1" else 12, - } with torch.no_grad(): - logits = model.extract_features(**inputs) - feats = model.final_proj(logits[0]) if version == "v1" else logits[0] + feats = model(feats.to(self.device))["last_hidden_state"] + feats = ( + model.final_proj(feats[0]).unsqueeze(0) if version == "v1" else feats + ) if protect < 0.5 and pitch != None and pitchf != None: feats0 = feats.clone() if ( diff --git a/rvc/lib/algorithm/commons.py b/rvc/lib/algorithm/commons.py index 3c98abb8bf59456b016ebc9147ed91a414862c92..c76328c9188d48b296a297c0599a4d825dc9150f 100644 --- a/rvc/lib/algorithm/commons.py +++ b/rvc/lib/algorithm/commons.py @@ -57,37 +57,31 @@ def kl_divergence(m_p, logs_p, m_q, logs_q): return kl -def slice_segments(x, ids_str, segment_size=4): +def slice_segments( + x: torch.Tensor, ids_str: torch.Tensor, segment_size: int = 4, dim: int = 2 +): """ - Slice segments from a tensor. + Slice segments from a tensor, handling tensors with different numbers of dimensions. Args: - x: The tensor to slice. - ids_str: The starting indices of the segments. - segment_size: The size of each segment. - """ - ret = torch.zeros_like(x[:, :, :segment_size]) - for i in range(x.size(0)): - idx_str = ids_str[i] - idx_end = idx_str + segment_size - ret[i] = x[i, :, idx_str:idx_end] - return ret - - -def slice_segments2(x, ids_str, segment_size=4): + x (torch.Tensor): The tensor to slice. + ids_str (torch.Tensor): The starting indices of the segments. + segment_size (int, optional): The size of each segment. Defaults to 4. + dim (int, optional): The dimension to slice across (2D or 3D tensors). Defaults to 2. """ - Slice segments from a tensor. + if dim == 2: + ret = torch.zeros_like(x[:, :segment_size]) + elif dim == 3: + ret = torch.zeros_like(x[:, :, :segment_size]) - Args: - x: The tensor to slice. - ids_str: The starting indices of the segments. - segment_size: The size of each segment. - """ - ret = torch.zeros_like(x[:, :segment_size]) for i in range(x.size(0)): - idx_str = ids_str[i] + idx_str = ids_str[i].item() idx_end = idx_str + segment_size - ret[i] = x[i, idx_str:idx_end] + if dim == 2: + ret[i] = x[i, idx_str:idx_end] + else: + ret[i] = x[i, :, idx_str:idx_end] + return ret @@ -105,7 +99,7 @@ def rand_slice_segments(x, x_lengths=None, segment_size=4): x_lengths = t ids_str_max = x_lengths - segment_size + 1 ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) - ret = slice_segments(x, ids_str, segment_size) + ret = slice_segments(x, ids_str, segment_size, dim=3) return ret, ids_str @@ -163,6 +157,24 @@ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): return acts +# Zluda, same as previous, but without jit.script +def fused_add_tanh_sigmoid_multiply_no_jit(input_a, input_b, n_channels): + """ + Fused add tanh sigmoid multiply operation. + + Args: + input_a: The first input tensor. + input_b: The second input tensor. + n_channels: The number of channels. + """ + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + def convert_pad_shape(pad_shape: List[List[int]]) -> List[int]: """ Convert the pad shape to a list of integers. diff --git a/rvc/lib/algorithm/discriminators.py b/rvc/lib/algorithm/discriminators.py index 2a2b3e89347c5047e5be6b480fcb3e673a0b3caf..23f8c689464d9fcac3f07f7a04dc241627d926d7 100644 --- a/rvc/lib/algorithm/discriminators.py +++ b/rvc/lib/algorithm/discriminators.py @@ -112,6 +112,7 @@ class DiscriminatorS(torch.nn.Module): ] ) self.conv_post = norm_f(torch.nn.Conv1d(1024, 1, 3, 1, padding=1)) + self.lrelu = torch.nn.LeakyReLU(LRELU_SLOPE) def forward(self, x): """ @@ -122,7 +123,7 @@ class DiscriminatorS(torch.nn.Module): """ fmap = [] for conv in self.convs: - x = torch.nn.functional.leaky_relu(conv(x), LRELU_SLOPE) + x = self.lrelu(conv(x)) fmap.append(x) x = self.conv_post(x) fmap.append(x) @@ -172,6 +173,7 @@ class DiscriminatorP(torch.nn.Module): ) self.conv_post = norm_f(torch.nn.Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + self.lrelu = torch.nn.LeakyReLU(LRELU_SLOPE) def forward(self, x): """ @@ -188,7 +190,7 @@ class DiscriminatorP(torch.nn.Module): x = x.view(b, c, -1, self.period) for conv in self.convs: - x = torch.nn.functional.leaky_relu(conv(x), LRELU_SLOPE) + x = self.lrelu(conv(x)) fmap.append(x) x = self.conv_post(x) diff --git a/rvc/lib/algorithm/generators.py b/rvc/lib/algorithm/generators.py index 98dc91bb73ed1ce078bc297509f8d70b69a6235d..75fc2ad106de7a7333f2d610e61b4441cbd98bbb 100644 --- a/rvc/lib/algorithm/generators.py +++ b/rvc/lib/algorithm/generators.py @@ -8,7 +8,7 @@ from rvc.lib.algorithm.commons import init_weights class Generator(torch.nn.Module): - """Generator for synthesizing audio. + """Generator for synthesizing audio. Optimized for performance and quality. Args: initial_channel (int): Number of channels in the initial convolutional layer. @@ -40,9 +40,9 @@ class Generator(torch.nn.Module): ) resblock = ResBlock1 if resblock == "1" else ResBlock2 - self.ups = torch.nn.ModuleList() + self.ups_and_resblocks = torch.nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): - self.ups.append( + self.ups_and_resblocks.append( weight_norm( torch.nn.ConvTranspose1d( upsample_initial_channel // (2**i), @@ -53,53 +53,43 @@ class Generator(torch.nn.Module): ) ) ) - - self.resblocks = torch.nn.ModuleList() - for i in range(len(self.ups)): ch = upsample_initial_channel // (2 ** (i + 1)) for j, (k, d) in enumerate( zip(resblock_kernel_sizes, resblock_dilation_sizes) ): - self.resblocks.append(resblock(ch, k, d)) + self.ups_and_resblocks.append(resblock(ch, k, d)) self.conv_post = torch.nn.Conv1d(ch, 1, 7, 1, padding=3, bias=False) - self.ups.apply(init_weights) + self.ups_and_resblocks.apply(init_weights) if gin_channels != 0: self.cond = torch.nn.Conv1d(gin_channels, upsample_initial_channel, 1) - def forward(self, x: torch.Tensor, g: Optional[torch.Tensor] = None): - x = self.conv_pre(x) - if g is not None: - x = x + self.cond(g) - - for i in range(self.num_upsamples): - x = torch.nn.functional.leaky_relu(x, LRELU_SLOPE) - x = self.ups[i](x) - xs = None - for j in range(self.num_kernels): - if xs is None: - xs = self.resblocks[i * self.num_kernels + j](x) - else: - xs += self.resblocks[i * self.num_kernels + j](x) - x = xs / self.num_kernels - x = torch.nn.functional.leaky_relu(x) - x = self.conv_post(x) - x = torch.tanh(x) - - return x + def forward(self, x: torch.Tensor, g: Optional[torch.Tensor] = None): + x = self.conv_pre(x) + if g is not None: + x = x + self.cond(g) + + resblock_idx = 0 + for _ in range(self.num_upsamples): + x = torch.nn.functional.leaky_relu(x, LRELU_SLOPE) + x = self.ups_and_resblocks[resblock_idx](x) + resblock_idx += 1 + xs = 0 + for _ in range(self.num_kernels): + xs += self.ups_and_resblocks[resblock_idx](x) + resblock_idx += 1 + x = xs / self.num_kernels + + x = torch.nn.functional.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x def __prepare_scriptable__(self): """Prepares the module for scripting.""" - for l in self.ups: - for hook in l._forward_pre_hooks.values(): - if ( - hook.__module__ == "torch.nn.utils.parametrizations.weight_norm" - and hook.__class__.__name__ == "WeightNorm" - ): - torch.nn.utils.remove_weight_norm(l) - - for l in self.resblocks: + for l in self.ups_and_resblocks: for hook in l._forward_pre_hooks.values(): if ( hook.__module__ == "torch.nn.utils.parametrizations.weight_norm" @@ -110,10 +100,8 @@ class Generator(torch.nn.Module): def remove_weight_norm(self): """Removes weight normalization from the upsampling and residual blocks.""" - for l in self.ups: + for l in self.ups_and_resblocks: remove_weight_norm(l) - for l in self.resblocks: - l.remove_weight_norm() class SineGen(torch.nn.Module): @@ -151,7 +139,6 @@ class SineGen(torch.nn.Module): Args: f0 (torch.Tensor): F0 tensor with shape (batch_size, length, 1).. """ - # generate uv signal uv = torch.ones_like(f0) uv = uv * (f0 > self.voiced_threshold) return uv @@ -166,12 +153,13 @@ class SineGen(torch.nn.Module): with torch.no_grad(): f0 = f0[:, None].transpose(1, 2) f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, device=f0.device) - # fundamental component f0_buf[:, :, 0] = f0[:, :, 0] - for idx in range(self.harmonic_num): - f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( - idx + 2 - ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic + f0_buf[:, :, 1:] = ( + f0_buf[:, :, 0:1] + * torch.arange(2, self.harmonic_num + 2, device=f0.device)[ + None, None, : + ] + ) rad_values = (f0_buf / float(self.sample_rate)) % 1 rand_ini = torch.rand( f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device diff --git a/rvc/lib/algorithm/modules.py b/rvc/lib/algorithm/modules.py index 11e669b8c0fcf31a68d46b932c4e47eabc549608..7a9bc5414c5f309e957a0f45837c1bdb06051983 100644 --- a/rvc/lib/algorithm/modules.py +++ b/rvc/lib/algorithm/modules.py @@ -1,5 +1,8 @@ import torch -from rvc.lib.algorithm.commons import fused_add_tanh_sigmoid_multiply +from rvc.lib.algorithm.commons import ( + fused_add_tanh_sigmoid_multiply_no_jit, + fused_add_tanh_sigmoid_multiply, +) class WaveNet(torch.nn.Module): @@ -44,26 +47,25 @@ class WaveNet(torch.nn.Module): cond_layer, name="weight" ) + dilations = [dilation_rate**i for i in range(n_layers)] + paddings = [(kernel_size * d - d) // 2 for d in dilations] + for i in range(n_layers): - dilation = dilation_rate**i - padding = int((kernel_size * dilation - dilation) / 2) in_layer = torch.nn.Conv1d( hidden_channels, 2 * hidden_channels, kernel_size, - dilation=dilation, - padding=padding, + dilation=dilations[i], + padding=paddings[i], ) in_layer = torch.nn.utils.parametrizations.weight_norm( in_layer, name="weight" ) self.in_layers.append(in_layer) - # last one is not necessary - if i < n_layers - 1: - res_skip_channels = 2 * hidden_channels - else: - res_skip_channels = hidden_channels + res_skip_channels = ( + hidden_channels if i == n_layers - 1 else 2 * hidden_channels + ) res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) res_skip_layer = torch.nn.utils.parametrizations.weight_norm( @@ -79,7 +81,6 @@ class WaveNet(torch.nn.Module): x_mask (torch.Tensor): Mask tensor of shape (batch_size, 1, time_steps). g (torch.Tensor, optional): Conditioning tensor of shape (batch_size, gin_channels, time_steps). Defaults to None. - """ output = torch.zeros_like(x) n_channels_tensor = torch.IntTensor([self.hidden_channels]) @@ -87,6 +88,11 @@ class WaveNet(torch.nn.Module): if g is not None: g = self.cond_layer(g) + # Zluda + is_zluda = x.device.type == "cuda" and torch.cuda.get_device_name().endswith( + "[ZLUDA]" + ) + for i in range(self.n_layers): x_in = self.in_layers[i](x) if g is not None: @@ -95,7 +101,14 @@ class WaveNet(torch.nn.Module): else: g_l = torch.zeros_like(x_in) - acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + # Preventing HIP crash by not using jit-decorated function + if is_zluda: + acts = fused_add_tanh_sigmoid_multiply_no_jit( + x_in, g_l, n_channels_tensor + ) + else: + acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + acts = self.drop(acts) res_skip_acts = self.res_skip_layers[i](acts) diff --git a/rvc/lib/algorithm/nsf.py b/rvc/lib/algorithm/nsf.py index a7eaac7a0a277ba0103235af6cbe61a6ff3b6695..465e04de55d4881008552df091fcf1800f4ccd94 100644 --- a/rvc/lib/algorithm/nsf.py +++ b/rvc/lib/algorithm/nsf.py @@ -43,8 +43,8 @@ class SourceModuleHnNSF(torch.nn.Module): self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) self.l_tanh = torch.nn.Tanh() - def forward(self, x: torch.Tensor, upp: int = 1): - sine_wavs, uv, _ = self.l_sin_gen(x, upp) + def forward(self, x: torch.Tensor, upsample_factor: int = 1): + sine_wavs, uv, _ = self.l_sin_gen(x, upsample_factor) sine_wavs = sine_wavs.to(dtype=self.l_linear.weight.dtype) sine_merge = self.l_tanh(self.l_linear(sine_wavs)) return sine_merge, None, None @@ -97,13 +97,21 @@ class GeneratorNSF(torch.nn.Module): self.ups = torch.nn.ModuleList() self.noise_convs = torch.nn.ModuleList() + channels = [ + upsample_initial_channel // (2 ** (i + 1)) + for i in range(len(upsample_rates)) + ] + stride_f0s = [ + math.prod(upsample_rates[i + 1 :]) if i + 1 < len(upsample_rates) else 1 + for i in range(len(upsample_rates)) + ] + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): - current_channel = upsample_initial_channel // (2 ** (i + 1)) self.ups.append( weight_norm( torch.nn.ConvTranspose1d( upsample_initial_channel // (2**i), - current_channel, + channels[i], k, u, padding=(k - u) // 2, @@ -111,30 +119,25 @@ class GeneratorNSF(torch.nn.Module): ) ) - stride_f0 = ( - math.prod(upsample_rates[i + 1 :]) if i + 1 < len(upsample_rates) else 1 - ) self.noise_convs.append( torch.nn.Conv1d( 1, - current_channel, - kernel_size=stride_f0 * 2 if stride_f0 > 1 else 1, - stride=stride_f0, - padding=(stride_f0 // 2 if stride_f0 > 1 else 0), + channels[i], + kernel_size=(stride_f0s[i] * 2 if stride_f0s[i] > 1 else 1), + stride=stride_f0s[i], + padding=(stride_f0s[i] // 2 if stride_f0s[i] > 1 else 0), ) ) self.resblocks = torch.nn.ModuleList( [ - resblock_cls(upsample_initial_channel // (2 ** (i + 1)), k, d) + resblock_cls(channels[i], k, d) for i in range(len(self.ups)) for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes) ] ) - self.conv_post = torch.nn.Conv1d( - current_channel, 1, 7, 1, padding=3, bias=False - ) + self.conv_post = torch.nn.Conv1d(channels[-1], 1, 7, 1, padding=3, bias=False) self.ups.apply(init_weights) if gin_channels != 0: diff --git a/rvc/lib/algorithm/residuals.py b/rvc/lib/algorithm/residuals.py index d75829ef4a3e314e775bd8d84c32fbd97e7ddf83..aad4f0bd6026eaf951c71a22815c1ee9d6d3e2b3 100644 --- a/rvc/lib/algorithm/residuals.py +++ b/rvc/lib/algorithm/residuals.py @@ -202,7 +202,7 @@ class ResidualCouplingBlock(torch.nn.Module): for flow in self.flows: x, _ = flow(x, x_mask, g=g, reverse=reverse) else: - for flow in self.flows[::-1]: + for flow in reversed(self.flows): x = flow.forward(x, x_mask, g=g, reverse=reverse) return x diff --git a/rvc/lib/algorithm/synthesizers.py b/rvc/lib/algorithm/synthesizers.py index 7202b3336864224b9034930050ad5c2a187b5c10..2a1aa2366b49d5124c65e4234b6948ed2ca70676 100644 --- a/rvc/lib/algorithm/synthesizers.py +++ b/rvc/lib/algorithm/synthesizers.py @@ -3,7 +3,7 @@ from typing import Optional from rvc.lib.algorithm.nsf import GeneratorNSF from rvc.lib.algorithm.generators import Generator -from rvc.lib.algorithm.commons import slice_segments2, rand_slice_segments +from rvc.lib.algorithm.commons import slice_segments, rand_slice_segments from rvc.lib.algorithm.residuals import ResidualCouplingBlock from rvc.lib.algorithm.encoders import TextEncoder, PosteriorEncoder @@ -189,7 +189,7 @@ class Synthesizer(torch.nn.Module): z_p = self.flow(z, y_mask, g=g) z_slice, ids_slice = rand_slice_segments(z, y_lengths, self.segment_size) if self.use_f0: - pitchf = slice_segments2(pitchf, ids_slice, self.segment_size) + pitchf = slice_segments(pitchf, ids_slice, self.segment_size, 2) o = self.dec(z_slice, pitchf, g=g) else: o = self.dec(z_slice, g=g) diff --git a/rvc/lib/predictors/F0Extractor.py b/rvc/lib/predictors/F0Extractor.py index bc3b61f3da741d428add423019286f081d73775c..d5177c4874a5aeeacad76c6f1e8c87fb448fbd40 100644 --- a/rvc/lib/predictors/F0Extractor.py +++ b/rvc/lib/predictors/F0Extractor.py @@ -40,8 +40,16 @@ class F0Extractor: def extract_f0(self) -> np.ndarray: f0 = None method = self.method + # Fall back to CPU for ZLUDA as these methods use CUcFFT + device = ( + "cpu" + if "cuda" in config.device + and torch.cuda.get_device_name().endswith("[ZLUDA]") + else config.device + ) + if method == "crepe": - wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device) + wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(device) f0 = torchcrepe.predict( wav16k_torch, sample_rate=16000, @@ -49,7 +57,7 @@ class F0Extractor: batch_size=512, fmin=self.f0_min, fmax=self.f0_max, - device=config.device, + device=device, ) f0 = f0[0].cpu().numpy() elif method == "fcpe": @@ -57,13 +65,9 @@ class F0Extractor: audio_length = len(audio) f0_target_length = (audio_length // self.hop_length) + 1 audio = ( - torch.from_numpy(audio) - .float() - .unsqueeze(0) - .unsqueeze(-1) - .to(config.device) + torch.from_numpy(audio).float().unsqueeze(0).unsqueeze(-1).to(device) ) - model = torchfcpe.spawn_bundled_infer_model(device=config.device) + model = torchfcpe.spawn_bundled_infer_model(device=device) f0 = model.infer( audio, @@ -77,10 +81,11 @@ class F0Extractor: ) f0 = f0.squeeze().cpu().numpy() elif method == "rmvpe": + is_half = False if device == "cpu" else config.is_half model_rmvpe = RMVPE0Predictor( os.path.join("rvc", "models", "predictors", "rmvpe.pt"), - is_half=config.is_half, - device=config.device, + is_half=is_half, + device=device, # hop_length=80 ) f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03) diff --git a/rvc/lib/predictors/F0Predictor.py b/rvc/lib/predictors/F0Predictor.py deleted file mode 100644 index 384f43f8a15977edb66d5cf8b074a8109abf10a1..0000000000000000000000000000000000000000 --- a/rvc/lib/predictors/F0Predictor.py +++ /dev/null @@ -1,6 +0,0 @@ -class F0Predictor(object): - def compute_f0(self, wav, p_len): - pass - - def compute_f0_uv(self, wav, p_len): - pass diff --git a/rvc/lib/predictors/FCPE.py b/rvc/lib/predictors/FCPE.py index a649ca015471c5b3ba39a758c40894f4265f3786..12f6c346aa5d448a2133400a09e103043b5863c8 100644 --- a/rvc/lib/predictors/FCPE.py +++ b/rvc/lib/predictors/FCPE.py @@ -22,49 +22,42 @@ os.environ["LRU_CACHE_CAPACITY"] = "3" def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False): - sampling_rate = None + """Loads wav file to torch tensor.""" try: - data, sampling_rate = sf.read(full_path, always_2d=True) # than soundfile. + data, sample_rate = sf.read(full_path, always_2d=True) except Exception as error: print(f"An error occurred loading {full_path}: {error}") if return_empty_on_exception: - return [], sampling_rate or target_sr or 48000 + return [], sample_rate or target_sr or 48000 else: - raise Exception(error) - - if len(data.shape) > 1: - data = data[:, 0] - assert ( - len(data) > 2 - ) # check duration of audio file is > 2 samples (because otherwise the slice operation was on the wrong dimension) - - if np.issubdtype(data.dtype, np.integer): # if audio data is type int - max_mag = -np.iinfo( - data.dtype - ).min # maximum magnitude = min possible value of intXX - else: # if audio data is type fp32 - max_mag = max(np.amax(data), -np.amin(data)) - max_mag = ( - (2**31) + 1 - if max_mag > (2**15) - else ((2**15) + 1 if max_mag > 1.01 else 1.0) - ) # data should be either 16-bit INT, 32-bit INT or [-1 to 1] float32 - + raise + + data = data[:, 0] if len(data.shape) > 1 else data + assert len(data) > 2 + + # Normalize data + max_mag = ( + -np.iinfo(data.dtype).min + if np.issubdtype(data.dtype, np.integer) + else max(np.amax(data), -np.amin(data)) + ) + max_mag = ( + (2**31) + 1 if max_mag > (2**15) else ((2**15) + 1 if max_mag > 1.01 else 1.0) + ) data = torch.FloatTensor(data.astype(np.float32)) / max_mag - if ( - torch.isinf(data) | torch.isnan(data) - ).any() and return_empty_on_exception: # resample will crash with inf/NaN inputs. return_empty_on_exception will return empty arr instead of except - return [], sampling_rate or target_sr or 48000 - if target_sr is not None and sampling_rate != target_sr: + # Handle exceptions and resample + if (torch.isinf(data) | torch.isnan(data)).any() and return_empty_on_exception: + return [], sample_rate or target_sr or 48000 + if target_sr is not None and sample_rate != target_sr: data = torch.from_numpy( librosa.core.resample( - data.numpy(), orig_sr=sampling_rate, target_sr=target_sr + data.numpy(), orig_sr=sample_rate, target_sr=target_sr ) ) - sampling_rate = target_sr + sample_rate = target_sr - return data, sampling_rate + return data, sample_rate def dynamic_range_compression(x, C=1, clip_val=1e-5): @@ -96,7 +89,6 @@ class STFT: clip_val=1e-5, ): self.target_sr = sr - self.n_mels = n_mels self.n_fft = n_fft self.win_size = win_size @@ -108,7 +100,7 @@ class STFT: self.hann_window = {} def get_mel(self, y, keyshift=0, speed=1, center=False, train=False): - sampling_rate = self.target_sr + sample_rate = self.target_sr n_mels = self.n_mels n_fft = self.n_fft win_size = self.win_size @@ -121,17 +113,15 @@ class STFT: n_fft_new = int(np.round(n_fft * factor)) win_size_new = int(np.round(win_size * factor)) hop_length_new = int(np.round(hop_length * speed)) - if not train: - mel_basis = self.mel_basis - hann_window = self.hann_window - else: - mel_basis = {} - hann_window = {} + + # Optimize mel_basis and hann_window caching + mel_basis = self.mel_basis if not train else {} + hann_window = self.hann_window if not train else {} mel_basis_key = str(fmax) + "_" + str(y.device) if mel_basis_key not in mel_basis: mel = librosa_mel_fn( - sr=sampling_rate, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax + sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax ) mel_basis[mel_basis_key] = torch.from_numpy(mel).float().to(y.device) @@ -139,15 +129,13 @@ class STFT: if keyshift_key not in hann_window: hann_window[keyshift_key] = torch.hann_window(win_size_new).to(y.device) + # Padding and STFT pad_left = (win_size_new - hop_length_new) // 2 pad_right = max( (win_size_new - hop_length_new + 1) // 2, win_size_new - y.size(-1) - pad_left, ) - if pad_right < y.size(-1): - mode = "reflect" - else: - mode = "constant" + mode = "reflect" if pad_right < y.size(-1) else "constant" y = torch.nn.functional.pad(y.unsqueeze(1), (pad_left, pad_right), mode=mode) y = y.squeeze(1) @@ -164,12 +152,17 @@ class STFT: return_complex=True, ) spec = torch.sqrt(spec.real.pow(2) + spec.imag.pow(2) + (1e-9)) + + # Handle keyshift and mel conversion if keyshift != 0: size = n_fft // 2 + 1 resize = spec.size(1) - if resize < size: - spec = F.pad(spec, (0, 0, 0, size - resize)) - spec = spec[:, :size, :] * win_size / win_size_new + spec = ( + F.pad(spec, (0, 0, 0, size - resize)) + if resize < size + else spec[:, :size, :] + ) + spec = spec * win_size / win_size_new spec = torch.matmul(mel_basis[mel_basis_key], spec) spec = dynamic_range_compression_torch(spec, clip_val=clip_val) return spec @@ -182,34 +175,28 @@ class STFT: stft = STFT() -# import fast_transformers.causal_product.causal_product_cuda - def softmax_kernel( data, *, projection_matrix, is_query, normalize_data=True, eps=1e-4, device=None ): b, h, *_ = data.shape - # (batch size, head, length, model_dim) - # normalize model dim + # Normalize data data_normalizer = (data.shape[-1] ** -0.25) if normalize_data else 1.0 - # what is ration?, projection_matrix.shape[0] --> 266 - + # Project data ratio = projection_matrix.shape[0] ** -0.5 - projection = repeat(projection_matrix, "j d -> b h j d", b=b, h=h) projection = projection.type_as(data) - - # data_dash = w^T x data_dash = torch.einsum("...id,...jd->...ij", (data_normalizer * data), projection) - # diag_data = D**2 + # Calculate diagonal data diag_data = data**2 diag_data = torch.sum(diag_data, dim=-1) diag_data = (diag_data / 2.0) * (data_normalizer**2) diag_data = diag_data.unsqueeze(dim=-1) + # Apply softmax if is_query: data_dash = ratio * ( torch.exp( @@ -220,9 +207,7 @@ def softmax_kernel( + eps ) else: - data_dash = ratio * ( - torch.exp(data_dash - diag_data + eps) - ) # - torch.max(data_dash)) + eps) + data_dash = ratio * (torch.exp(data_dash - diag_data + eps)) return data_dash.type_as(data) @@ -232,8 +217,6 @@ def orthogonal_matrix_chunk(cols, qr_uniform_q=False, device=None): q, r = torch.linalg.qr(unstructured_block.cpu(), mode="reduced") q, r = map(lambda t: t.to(device), (q, r)) - # proposed by @Parskatt - # to make sure Q is uniform https://arxiv.org/pdf/math-ph/0609050.pdf if qr_uniform_q: d = torch.diag(r, 0) q *= d.sign() @@ -257,8 +240,6 @@ def cast_tuple(val): class PCmer(nn.Module): - """The encoder that is used in the Transformer model.""" - def __init__( self, num_layers, @@ -280,56 +261,25 @@ class PCmer(nn.Module): self._layers = nn.ModuleList([_EncoderLayer(self) for _ in range(num_layers)]) - # METHODS ######################################################################################################## - def forward(self, phone, mask=None): - - # apply all layers to the input - for i, layer in enumerate(self._layers): + for layer in self._layers: phone = layer(phone, mask) - # provide the final sequence return phone -# ==================================================================================================================== # -# CLASS _ E N C O D E R L A Y E R # -# ==================================================================================================================== # - - class _EncoderLayer(nn.Module): - """One layer of the encoder. - - Attributes: - attn: (:class:`mha.MultiHeadAttention`): The attention mechanism that is used to read the input sequence. - feed_forward (:class:`ffl.FeedForwardLayer`): The feed-forward layer on top of the attention mechanism. - """ - def __init__(self, parent: PCmer): - """Creates a new instance of ``_EncoderLayer``. - - Args: - parent (Encoder): The encoder that the layers is created for. - """ super().__init__() - self.conformer = ConformerConvModule(parent.dim_model) self.norm = nn.LayerNorm(parent.dim_model) self.dropout = nn.Dropout(parent.residual_dropout) - - # selfatt -> fastatt: performer! self.attn = SelfAttention( dim=parent.dim_model, heads=parent.num_heads, causal=False ) - # METHODS ######################################################################################################## - def forward(self, phone, mask=None): - - # compute attention sub-layer phone = phone + (self.attn(self.norm(phone), mask=mask)) - phone = phone + (self.conformer(phone)) - return phone @@ -338,9 +288,6 @@ def calc_same_padding(kernel_size): return (pad, pad - (kernel_size + 1) % 2) -# helper classes - - class Swish(nn.Module): def forward(self, x): return x * x.sigmoid() @@ -394,7 +341,6 @@ class ConformerConvModule(nn.Module): DepthWiseConv1d( inner_dim, inner_dim, kernel_size=kernel_size, padding=padding ), - # nn.BatchNorm1d(inner_dim) if not causal else nn.Identity(), Swish(), nn.Conv1d(inner_dim, dim, 1), Transpose((1, 2)), @@ -409,12 +355,9 @@ def linear_attention(q, k, v): if v is None: out = torch.einsum("...ed,...nd->...ne", k, q) return out - else: k_cumsum = k.sum(dim=-2) - # k_cumsum = k.sum(dim = -2) D_inv = 1.0 / (torch.einsum("...nd,...d->...n", q, k_cumsum.type_as(q)) + 1e-8) - context = torch.einsum("...nd,...ne->...de", k, v) out = torch.einsum("...de,...nd,...n->...ne", context, q, D_inv) return out @@ -437,7 +380,6 @@ def gaussian_orthogonal_random_matrix( q = orthogonal_matrix_chunk( nb_columns, qr_uniform_q=qr_uniform_q, device=device ) - block_list.append(q[:remaining_rows]) final_matrix = torch.cat(block_list) @@ -485,11 +427,7 @@ class FastAttention(nn.Module): self.generalized_attention = generalized_attention self.kernel_fn = kernel_fn - - # if this is turned on, no projection will be used - # queries and keys will be softmax-ed as in the original efficient attention paper self.no_projection = no_projection - self.causal = causal @torch.no_grad() @@ -508,11 +446,11 @@ class FastAttention(nn.Module): create_kernel = partial( softmax_kernel, projection_matrix=self.projection_matrix, device=device ) - q = create_kernel(q, is_query=True) k = create_kernel(k, is_query=False) attn_fn = linear_attention if not self.causal else self.causal_linear_fn + if v is None: out = attn_fn(q, k, None) return out @@ -590,7 +528,6 @@ class SelfAttention(nn.Module): _, _, _, h, gh = *x.shape, self.heads, self.global_heads cross_attend = exists(context) - context = default(context, x) context_mask = default(context_mask, mask) if not cross_attend else context_mask q, k, v = self.to_q(x), self.to_k(context), self.to_v(context) @@ -604,7 +541,7 @@ class SelfAttention(nn.Module): global_mask = context_mask[:, None, :, None] v.masked_fill_(~global_mask, 0.0) if cross_attend: - pass + pass # TODO: Implement cross-attention else: out = self.fast_attention(q, k, v) attn_outs.append(out) @@ -712,31 +649,25 @@ class FCPE(nn.Module): def forward( self, mel, infer=True, gt_f0=None, return_hz_f0=False, cdecoder="local_argmax" ): - """ - input: - B x n_frames x n_unit - return: - dict of B x n_frames x feat - """ if cdecoder == "argmax": self.cdecoder = self.cents_decoder elif cdecoder == "local_argmax": self.cdecoder = self.cents_local_decoder - if self.use_input_conv: - x = self.stack(mel.transpose(1, 2)).transpose(1, 2) - else: - x = mel + + x = ( + self.stack(mel.transpose(1, 2)).transpose(1, 2) + if self.use_input_conv + else mel + ) x = self.decoder(x) x = self.norm(x) - x = self.dense_out(x) # [B,N,D] + x = self.dense_out(x) x = torch.sigmoid(x) + if not infer: - gt_cent_f0 = self.f0_to_cent(gt_f0) # mel f0 #[B,N,1] - gt_cent_f0 = self.gaussian_blurred_cent(gt_cent_f0) # #[B,N,out_dim] - loss_all = self.loss_mse_scale * F.binary_cross_entropy( - x, gt_cent_f0 - ) # bce loss - # l2 regularization + gt_cent_f0 = self.f0_to_cent(gt_f0) + gt_cent_f0 = self.gaussian_blurred_cent(gt_cent_f0) + loss_all = self.loss_mse_scale * F.binary_cross_entropy(x, gt_cent_f0) if self.loss_l2_regularization: loss_all = loss_all + l2_regularization( model=self, l2_alpha=self.loss_l2_regularization_scale @@ -745,8 +676,8 @@ class FCPE(nn.Module): if infer: x = self.cdecoder(x) x = self.cent_to_f0(x) - if not return_hz_f0: - x = (1 + x / 700).log() + x = (1 + x / 700).log() if not return_hz_f0 else x + return x def cents_decoder(self, y, mask=True): @@ -754,37 +685,30 @@ class FCPE(nn.Module): ci = self.cent_table[None, None, :].expand(B, N, -1) rtn = torch.sum(ci * y, dim=-1, keepdim=True) / torch.sum( y, dim=-1, keepdim=True - ) # cents: [B,N,1] + ) if mask: confident = torch.max(y, dim=-1, keepdim=True)[0] confident_mask = torch.ones_like(confident) confident_mask[confident <= self.threshold] = float("-INF") rtn = rtn * confident_mask - if self.confidence: - return rtn, confident - else: - return rtn + return (rtn, confident) if self.confidence else rtn def cents_local_decoder(self, y, mask=True): B, N, _ = y.size() ci = self.cent_table[None, None, :].expand(B, N, -1) confident, max_index = torch.max(y, dim=-1, keepdim=True) local_argmax_index = torch.arange(0, 9).to(max_index.device) + (max_index - 4) - local_argmax_index[local_argmax_index < 0] = 0 - local_argmax_index[local_argmax_index >= self.n_out] = self.n_out - 1 + local_argmax_index = torch.clamp(local_argmax_index, 0, self.n_out - 1) ci_l = torch.gather(ci, -1, local_argmax_index) y_l = torch.gather(y, -1, local_argmax_index) rtn = torch.sum(ci_l * y_l, dim=-1, keepdim=True) / torch.sum( y_l, dim=-1, keepdim=True - ) # cents: [B,N,1] + ) if mask: confident_mask = torch.ones_like(confident) confident_mask[confident <= self.threshold] = float("-INF") rtn = rtn * confident_mask - if self.confidence: - return rtn, confident - else: - return rtn + return (rtn, confident) if self.confidence else rtn def cent_to_f0(self, cent): return 10.0 * 2 ** (cent / 1200.0) @@ -792,7 +716,7 @@ class FCPE(nn.Module): def f0_to_cent(self, f0): return 1200.0 * torch.log2(f0 / 10.0) - def gaussian_blurred_cent(self, cents): # cents: [B,N,1] + def gaussian_blurred_cent(self, cents): mask = (cents > 0.1) & (cents < (1200.0 * np.log2(self.f0_max / 10.0))) B, N, _ = cents.size() ci = self.cent_table[None, None, :].expand(B, N, -1) @@ -839,10 +763,8 @@ class FCPEInfer: class Wav2Mel: - def __init__(self, args, device=None, dtype=torch.float32): - # self.args = args - self.sampling_rate = args.mel.sampling_rate + self.sample_rate = args.mel.sampling_rate self.hop_size = args.mel.hop_size if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" @@ -860,36 +782,32 @@ class Wav2Mel: self.resample_kernel = {} def extract_nvstft(self, audio, keyshift=0, train=False): - mel = self.stft.get_mel(audio, keyshift=keyshift, train=train).transpose( - 1, 2 - ) # B, n_frames, bins + mel = self.stft.get_mel(audio, keyshift=keyshift, train=train).transpose(1, 2) return mel def extract_mel(self, audio, sample_rate, keyshift=0, train=False): audio = audio.to(self.dtype).to(self.device) - # resample - if sample_rate == self.sampling_rate: + if sample_rate == self.sample_rate: audio_res = audio else: key_str = str(sample_rate) if key_str not in self.resample_kernel: self.resample_kernel[key_str] = Resample( - sample_rate, self.sampling_rate, lowpass_filter_width=128 + sample_rate, self.sample_rate, lowpass_filter_width=128 ) self.resample_kernel[key_str] = ( self.resample_kernel[key_str].to(self.dtype).to(self.device) ) audio_res = self.resample_kernel[key_str](audio) - # extract mel = self.extract_nvstft( audio_res, keyshift=keyshift, train=train ) # B, n_frames, bins n_frames = int(audio.shape[1] // self.hop_size) + 1 - if n_frames > int(mel.shape[1]): - mel = torch.cat((mel, mel[:, -1:, :]), 1) - if n_frames < int(mel.shape[1]): - mel = mel[:, :n_frames, :] + mel = ( + torch.cat((mel, mel[:, -1:, :]), 1) if n_frames > int(mel.shape[1]) else mel + ) + mel = mel[:, :n_frames, :] if n_frames < int(mel.shape[1]) else mel return mel def __call__(self, audio, sample_rate, keyshift=0, train=False): @@ -907,19 +825,9 @@ class DotDict(dict): class F0Predictor(object): def compute_f0(self, wav, p_len): - """ - input: wav:[signal_length] - p_len:int - output: f0:[signal_length//hop_length] - """ pass def compute_f0_uv(self, wav, p_len): - """ - input: wav:[signal_length] - p_len:int - output: f0:[signal_length//hop_length],uv:[signal_length//hop_length] - """ pass @@ -932,19 +840,16 @@ class FCPEF0Predictor(F0Predictor): f0_max=1100, dtype=torch.float32, device=None, - sampling_rate=44100, + sample_rate=44100, threshold=0.05, ): self.fcpe = FCPEInfer(model_path, device=device, dtype=dtype) self.hop_length = hop_length self.f0_min = f0_min self.f0_max = f0_max - if device is None: - self.device = "cuda" if torch.cuda.is_available() else "cpu" - else: - self.device = device + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") self.threshold = threshold - self.sampling_rate = sampling_rate + self.sample_rate = sample_rate self.dtype = dtype self.name = "fcpe" @@ -955,82 +860,61 @@ class FCPEF0Predictor(F0Predictor): mode: str = "nearest", ): ndim = content.ndim - - if content.ndim == 1: - content = content[None, None] - elif content.ndim == 2: - content = content[None] - + content = ( + content[None, None] + if ndim == 1 + else content[None] if ndim == 2 else content + ) assert content.ndim == 3 - is_np = isinstance(content, np.ndarray) - if is_np: - content = torch.from_numpy(content) - + content = torch.from_numpy(content) if is_np else content results = torch.nn.functional.interpolate(content, size=target_len, mode=mode) - - if is_np: - results = results.numpy() - - if ndim == 1: - return results[0, 0] - elif ndim == 2: - return results[0] - - def post_process(self, x, sampling_rate, f0, pad_to): - if isinstance(f0, np.ndarray): - f0 = torch.from_numpy(f0).float().to(x.device) - - if pad_to is None: - return f0 - - f0 = self.repeat_expand(f0, pad_to) + results = results.numpy() if is_np else results + return results[0, 0] if ndim == 1 else results[0] if ndim == 2 else results + + def post_process(self, x, sample_rate, f0, pad_to): + f0 = ( + torch.from_numpy(f0).float().to(x.device) + if isinstance(f0, np.ndarray) + else f0 + ) + f0 = self.repeat_expand(f0, pad_to) if pad_to is not None else f0 vuv_vector = torch.zeros_like(f0) vuv_vector[f0 > 0.0] = 1.0 vuv_vector[f0 <= 0.0] = 0.0 - # 去掉0频率, 并线性插值 nzindex = torch.nonzero(f0).squeeze() f0 = torch.index_select(f0, dim=0, index=nzindex).cpu().numpy() - time_org = self.hop_length / sampling_rate * nzindex.cpu().numpy() - time_frame = np.arange(pad_to) * self.hop_length / sampling_rate + time_org = self.hop_length / sample_rate * nzindex.cpu().numpy() + time_frame = np.arange(pad_to) * self.hop_length / sample_rate vuv_vector = F.interpolate(vuv_vector[None, None, :], size=pad_to)[0][0] if f0.shape[0] <= 0: - return ( - torch.zeros(pad_to, dtype=torch.float, device=x.device).cpu().numpy(), - vuv_vector.cpu().numpy(), - ) + return np.zeros(pad_to), vuv_vector.cpu().numpy() if f0.shape[0] == 1: - return ( - torch.ones(pad_to, dtype=torch.float, device=x.device) * f0[0] - ).cpu().numpy(), vuv_vector.cpu().numpy() + return np.ones(pad_to) * f0[0], vuv_vector.cpu().numpy() - # 大概可以用 torch 重写? f0 = np.interp(time_frame, time_org, f0, left=f0[0], right=f0[-1]) - # vuv_vector = np.ceil(scipy.ndimage.zoom(vuv_vector,pad_to/len(vuv_vector),order = 0)) - return f0, vuv_vector.cpu().numpy() def compute_f0(self, wav, p_len=None): x = torch.FloatTensor(wav).to(self.dtype).to(self.device) - if p_len is None: - print("fcpe p_len is None") - p_len = x.shape[0] // self.hop_length - f0 = self.fcpe(x, sr=self.sampling_rate, threshold=self.threshold)[0, :, 0] + p_len = x.shape[0] // self.hop_length if p_len is None else p_len + f0 = self.fcpe(x, sr=self.sample_rate, threshold=self.threshold)[0, :, 0] if torch.all(f0 == 0): - rtn = f0.cpu().numpy() if p_len is None else np.zeros(p_len) - return rtn, rtn - return self.post_process(x, self.sampling_rate, f0, p_len)[0] + return f0.cpu().numpy() if p_len is None else np.zeros(p_len), ( + f0.cpu().numpy() if p_len is None else np.zeros(p_len) + ) + return self.post_process(x, self.sample_rate, f0, p_len)[0] def compute_f0_uv(self, wav, p_len=None): x = torch.FloatTensor(wav).to(self.dtype).to(self.device) - if p_len is None: - p_len = x.shape[0] // self.hop_length - f0 = self.fcpe(x, sr=self.sampling_rate, threshold=self.threshold)[0, :, 0] + p_len = x.shape[0] // self.hop_length if p_len is None else p_len + f0 = self.fcpe(x, sr=self.sample_rate, threshold=self.threshold)[0, :, 0] if torch.all(f0 == 0): - rtn = f0.cpu().numpy() if p_len is None else np.zeros(p_len) - return rtn, rtn - return self.post_process(x, self.sampling_rate, f0, p_len) + return f0.cpu().numpy() if p_len is None else np.zeros(p_len), ( + f0.cpu().numpy() if p_len is None else np.zeros(p_len) + ) + return self.post_process(x, self.sample_rate, f0, p_len) diff --git a/rvc/lib/predictors/RMVPE.py b/rvc/lib/predictors/RMVPE.py index a3705bb5489c75e1b7b46c8357db4b34f550a886..7e9f6ddd4bb061164c2910e8c4216ef51acd3503 100644 --- a/rvc/lib/predictors/RMVPE.py +++ b/rvc/lib/predictors/RMVPE.py @@ -356,7 +356,7 @@ class MelSpectrogram(torch.nn.Module): Args: is_half (bool): Whether to use half-precision floating-point numbers. n_mel_channels (int): Number of Mel-frequency bands. - sampling_rate (int): Sampling rate of the audio. + sample_rate (int): Sampling rate of the audio. win_length (int): Length of the window function in samples. hop_length (int): Hop size between frames in samples. n_fft (int, optional): Length of the FFT window. Defaults to None, which uses win_length. @@ -369,7 +369,7 @@ class MelSpectrogram(torch.nn.Module): self, is_half, n_mel_channels, - sampling_rate, + sample_rate, win_length, hop_length, n_fft=None, @@ -381,7 +381,7 @@ class MelSpectrogram(torch.nn.Module): n_fft = win_length if n_fft is None else n_fft self.hann_window = {} mel_basis = mel( - sr=sampling_rate, + sr=sample_rate, n_fft=n_fft, n_mels=n_mel_channels, fmin=mel_fmin, @@ -393,7 +393,7 @@ class MelSpectrogram(torch.nn.Module): self.n_fft = win_length if n_fft is None else n_fft self.hop_length = hop_length self.win_length = win_length - self.sampling_rate = sampling_rate + self.sample_rate = sample_rate self.n_mel_channels = n_mel_channels self.clamp = clamp self.is_half = is_half @@ -408,6 +408,15 @@ class MelSpectrogram(torch.nn.Module): self.hann_window[keyshift_key] = torch.hann_window(win_length_new).to( audio.device ) + + # Zluda, fall-back to CPU for FFTs since HIP SDK has no cuFFT alternative + source_device = audio.device + if audio.device.type == "cuda" and torch.cuda.get_device_name().endswith( + "[ZLUDA]" + ): + audio = audio.to("cpu") + self.hann_window[keyshift_key] = self.hann_window[keyshift_key].to("cpu") + fft = torch.stft( audio, n_fft=n_fft_new, @@ -416,7 +425,8 @@ class MelSpectrogram(torch.nn.Module): window=self.hann_window[keyshift_key], center=center, return_complex=True, - ) + ).to(source_device) + magnitude = torch.sqrt(fft.real.pow(2) + fft.imag.pow(2)) if keyshift != 0: size = self.n_fft // 2 + 1 diff --git a/rvc/lib/tools/gdown.py b/rvc/lib/tools/gdown.py index a9c526b6eaf8d4098c3d31a6d23a9cf447c8cb5a..eb5ca071e52d3d48c58708ee2fbbeefb205827d3 100644 --- a/rvc/lib/tools/gdown.py +++ b/rvc/lib/tools/gdown.py @@ -1,28 +1,23 @@ -from __future__ import print_function - -import json import os -import os.path as osp import re -import warnings -from six.moves import urllib_parse -import shutil +import six import sys +import json +import tqdm +import time +import shutil +import warnings import tempfile import textwrap -import time - import requests -import six -import tqdm +from six.moves import urllib_parse def indent(text, prefix): - def prefixed_lines(): - for line in text.splitlines(True): - yield (prefix + line if line.strip() else line) - - return "".join(prefixed_lines()) + """Indent each non-empty line of text with the given prefix.""" + return "".join( + (prefix + line if line.strip() else line) for line in text.splitlines(True) + ) class FileURLRetrievalError(Exception): @@ -36,24 +31,26 @@ class FolderContentsMaximumLimitError(Exception): def parse_url(url, warning=True): """Parse URLs especially for Google Drive links. - file_id: ID of file on Google Drive. - is_download_link: Flag if it is download link of Google Drive. + Args: + url: URL to parse. + warning: Whether to warn if the URL is not a download link. + + Returns: + A tuple (file_id, is_download_link), where file_id is the ID of the + file on Google Drive, and is_download_link is a flag indicating + whether the URL is a download link. """ parsed = urllib_parse.urlparse(url) query = urllib_parse.parse_qs(parsed.query) - is_gdrive = parsed.hostname in ["drive.google.com", "docs.google.com"] + is_gdrive = parsed.hostname in ("drive.google.com", "docs.google.com") is_download_link = parsed.path.endswith("/uc") if not is_gdrive: - return is_gdrive, is_download_link + return None, is_download_link - file_id = None - if "id" in query: - file_ids = query["id"] - if len(file_ids) == 1: - file_id = file_ids[0] - else: - patterns = [ + file_id = query.get("id", [None])[0] + if file_id is None: + for pattern in ( r"^/file/d/(.*?)/(edit|view)$", r"^/file/u/[0-9]+/d/(.*?)/(edit|view)$", r"^/document/d/(.*?)/(edit|htmlview|view)$", @@ -62,62 +59,56 @@ def parse_url(url, warning=True): r"^/presentation/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$", r"^/spreadsheets/d/(.*?)/(edit|htmlview|view)$", r"^/spreadsheets/u/[0-9]+/d/(.*?)/(edit|htmlview|view)$", - ] - for pattern in patterns: + ): match = re.match(pattern, parsed.path) if match: - file_id = match.groups()[0] + file_id = match.group(1) break if warning and not is_download_link: warnings.warn( "You specified a Google Drive link that is not the correct link " "to download a file. You might want to try `--fuzzy` option " - "or the following url: {url}".format( - url="https://drive.google.com/uc?id={}".format(file_id) - ) + f"or the following url: https://drive.google.com/uc?id={file_id}" ) return file_id, is_download_link CHUNK_SIZE = 512 * 1024 # 512KB -home = osp.expanduser("~") +HOME = os.path.expanduser("~") def get_url_from_gdrive_confirmation(contents): - url = "" - m = re.search(r'href="(\/uc\?export=download[^"]+)', contents) - if m: - url = "https://docs.google.com" + m.groups()[0] - url = url.replace("&", "&") - return url - - m = re.search(r'href="/open\?id=([^"]+)"', contents) - if m: - url = m.groups()[0] - uuid = re.search( - r'(.*)

', contents) - if m: - error = m.groups()[0] + """Extract the download URL from a Google Drive confirmation page.""" + for pattern in ( + r'href="(\/uc\?export=download[^"]+)', + r'href="/open\?id=([^"]+)"', + r'"downloadUrl":"([^"]+)', + ): + match = re.search(pattern, contents) + if match: + url = match.group(1) + if pattern == r'href="/open\?id=([^"]+)"': + uuid = re.search( + r'(.*)

', contents) + if match: + error = match.group(1) raise FileURLRetrievalError(error) raise FileURLRetrievalError( @@ -128,8 +119,8 @@ def get_url_from_gdrive_confirmation(contents): def _get_session(proxy, use_cookies, return_cookies_file=False): + """Create a requests session with optional proxy and cookie handling.""" sess = requests.session() - sess.headers.update( {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)"} ) @@ -138,18 +129,14 @@ def _get_session(proxy, use_cookies, return_cookies_file=False): sess.proxies = {"http": proxy, "https": proxy} print("Using proxy:", proxy, file=sys.stderr) - # Load cookies if exists - cookies_file = osp.join(home, ".cache/gdown/cookies.json") - if osp.exists(cookies_file) and use_cookies: + cookies_file = os.path.join(HOME, ".cache/gdown/cookies.json") + if os.path.exists(cookies_file) and use_cookies: with open(cookies_file) as f: cookies = json.load(f) for k, v in cookies: sess.cookies[k] = v - if return_cookies_file: - return sess, cookies_file - else: - return sess + return (sess, cookies_file) if return_cookies_file else sess def download( @@ -206,7 +193,7 @@ def download( if not (id is None) ^ (url is None): raise ValueError("Either url or id has to be specified") if id is not None: - url = "https://drive.google.com/uc?id={id}".format(id=id) + url = f"https://drive.google.com/uc?id={id}" url_origin = url @@ -218,7 +205,7 @@ def download( if fuzzy and gdrive_file_id: # overwrite the url with fuzzy match of a file id - url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id) + url = f"https://drive.google.com/uc?id={gdrive_file_id}" url_origin = url is_gdrive_download_link = True @@ -227,56 +214,32 @@ def download( if url == url_origin and res.status_code == 500: # The file could be Google Docs or Spreadsheets. - url = "https://drive.google.com/open?id={id}".format(id=gdrive_file_id) + url = f"https://drive.google.com/open?id={gdrive_file_id}" continue if res.headers["Content-Type"].startswith("text/html"): - m = re.search("(.+)", res.text) - if m and m.groups()[0].endswith(" - Google Docs"): - url = ( - "https://docs.google.com/document/d/{id}/export" - "?format={format}".format( - id=gdrive_file_id, - format="docx" if format is None else format, - ) - ) - continue - elif m and m.groups()[0].endswith(" - Google Sheets"): - url = ( - "https://docs.google.com/spreadsheets/d/{id}/export" - "?format={format}".format( - id=gdrive_file_id, - format="xlsx" if format is None else format, - ) - ) - continue - elif m and m.groups()[0].endswith(" - Google Slides"): - url = ( - "https://docs.google.com/presentation/d/{id}/export" - "?format={format}".format( - id=gdrive_file_id, - format="pptx" if format is None else format, - ) - ) - continue + title = re.search("(.+)", res.text) + if title: + title = title.group(1) + if title.endswith(" - Google Docs"): + url = f"https://docs.google.com/document/d/{gdrive_file_id}/export?format={'docx' if format is None else format}" + continue + if title.endswith(" - Google Sheets"): + url = f"https://docs.google.com/spreadsheets/d/{gdrive_file_id}/export?format={'xlsx' if format is None else format}" + continue + if title.endswith(" - Google Slides"): + url = f"https://docs.google.com/presentation/d/{gdrive_file_id}/export?format={'pptx' if format is None else format}" + continue elif ( "Content-Disposition" in res.headers and res.headers["Content-Disposition"].endswith("pptx") - and format not in {None, "pptx"} + and format not in (None, "pptx") ): - url = ( - "https://docs.google.com/presentation/d/{id}/export" - "?format={format}".format( - id=gdrive_file_id, - format="pptx" if format is None else format, - ) - ) + url = f"https://docs.google.com/presentation/d/{gdrive_file_id}/export?format={'pptx' if format is None else format}" continue if use_cookies: - if not osp.exists(osp.dirname(cookies_file)): - os.makedirs(osp.dirname(cookies_file)) - # Save cookies + os.makedirs(os.path.dirname(cookies_file), exist_ok=True) with open(cookies_file, "w") as f: cookies = [ (k, v) @@ -296,53 +259,47 @@ def download( url = get_url_from_gdrive_confirmation(res.text) except FileURLRetrievalError as e: message = ( - "Failed to retrieve file url:\n\n{}\n\n" + "Failed to retrieve file url:\n\n" + "{}\n\n" "You may still be able to access the file from the browser:" - "\n\n\t{}\n\n" + f"\n\n\t{url_origin}\n\n" "but Gdown can't. Please check connections and permissions." - ).format( - indent("\n".join(textwrap.wrap(str(e))), prefix="\t"), - url_origin, - ) + ).format(indent("\n".join(textwrap.wrap(str(e))), prefix="\t")) raise FileURLRetrievalError(message) if gdrive_file_id and is_gdrive_download_link: - content_disposition = six.moves.urllib_parse.unquote( - res.headers["Content-Disposition"] - ) - - m = re.search(r"filename\*=UTF-8''(.*)", content_disposition) - if not m: - m = re.search(r'filename=["\']?(.*?)["\']?$', content_disposition) - filename_from_url = m.groups()[0] - filename_from_url = filename_from_url.replace(osp.sep, "_") + content_disposition = urllib_parse.unquote(res.headers["Content-Disposition"]) + filename_from_url = ( + re.search(r"filename\*=UTF-8''(.*)", content_disposition) + or re.search(r'filename=["\']?(.*?)["\']?$', content_disposition) + ).group(1) + filename_from_url = filename_from_url.replace(os.path.sep, "_") else: - filename_from_url = osp.basename(url) + filename_from_url = os.path.basename(url) - if output is None: - output = filename_from_url + output = output or filename_from_url output_is_path = isinstance(output, six.string_types) - if output_is_path and output.endswith(osp.sep): - if not osp.exists(output): - os.makedirs(output) - output = osp.join(output, filename_from_url) + if output_is_path and output.endswith(os.path.sep): + os.makedirs(output, exist_ok=True) + output = os.path.join(output, filename_from_url) if output_is_path: - existing_tmp_files = [] - for file in os.listdir(osp.dirname(output) or "."): - if file.startswith(osp.basename(output)): - existing_tmp_files.append(osp.join(osp.dirname(output), file)) + temp_dir = os.path.dirname(output) or "." + prefix = os.path.basename(output) + existing_tmp_files = [ + os.path.join(temp_dir, file) + for file in os.listdir(temp_dir) + if file.startswith(prefix) + ] if resume and existing_tmp_files: - if len(existing_tmp_files) != 1: + if len(existing_tmp_files) > 1: print( "There are multiple temporary files to resume:", file=sys.stderr, ) - print("\n") for file in existing_tmp_files: - print("\t", file, file=sys.stderr) - print("\n") + print(f"\t{file}", file=sys.stderr) print( "Please remove them except one to resume downloading.", file=sys.stderr, @@ -351,12 +308,8 @@ def download( tmp_file = existing_tmp_files[0] else: resume = False - # mkstemp is preferred, but does not work on Windows - # https://github.com/wkentaro/gdown/issues/153 tmp_file = tempfile.mktemp( - suffix=tempfile.template, - prefix=osp.basename(output), - dir=osp.dirname(output), + suffix=tempfile.template, prefix=prefix, dir=temp_dir ) f = open(tmp_file, "ab") else: @@ -364,28 +317,20 @@ def download( f = output if tmp_file is not None and f.tell() != 0: - headers = {"Range": "bytes={}-".format(f.tell())} + headers = {"Range": f"bytes={f.tell()}-"} res = sess.get(url, headers=headers, stream=True, verify=verify) if not quiet: - # print("Downloading...", file=sys.stderr) if resume: print("Resume:", tmp_file, file=sys.stderr) - # if url_origin != url: - # print("From (original):", url_origin, file=sys.stderr) - # print("From (redirected):", url, file=sys.stderr) - # else: - # print("From:", url, file=sys.stderr) print( "To:", - osp.abspath(output) if output_is_path else output, + os.path.abspath(output) if output_is_path else output, file=sys.stderr, ) try: - total = res.headers.get("Content-Length") - if total is not None: - total = int(total) + total = int(res.headers.get("Content-Length", 0)) if not quiet: pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True) t_start = time.time() diff --git a/rvc/lib/tools/launch_tensorboard.py b/rvc/lib/tools/launch_tensorboard.py index bae2219b3e579733d61f19021ff5c545f2981dfd..7f74e316762b737037f7b8e4448a1042553d5651 100644 --- a/rvc/lib/tools/launch_tensorboard.py +++ b/rvc/lib/tools/launch_tensorboard.py @@ -1,13 +1,18 @@ import time +import logging from tensorboard import program log_path = "logs" def launch_tensorboard_pipeline(): + logging.getLogger("root").setLevel(logging.WARNING) + logging.getLogger("tensorboard").setLevel(logging.WARNING) + tb = program.TensorBoard() tb.configure(argv=[None, "--logdir", log_path]) url = tb.launch() + print( f"Access the tensorboard using the following link:\n{url}?pinnedCards=%5B%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Ftotal%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fd%2Ftotal%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Fkl%22%7D%2C%7B%22plugin%22%3A%22scalars%22%2C%22tag%22%3A%22loss%2Fg%2Fmel%22%7D%5D" ) diff --git a/rvc/lib/tools/model_download.py b/rvc/lib/tools/model_download.py index 2ee62728f8d4815a9bccc0dcaf18caad4812ccc0..ab1b136e265994ea879e501bab1c93eebff6f468 100644 --- a/rvc/lib/tools/model_download.py +++ b/rvc/lib/tools/model_download.py @@ -382,4 +382,4 @@ def model_download_pipeline(url: str): return "Error" except Exception as error: print(f"An unexpected error occurred: {error}") - return "Error" \ No newline at end of file + return "Error" diff --git a/rvc/lib/tools/prerequisites_download.py b/rvc/lib/tools/prerequisites_download.py index 08bd8b82dfb0e851ad6b2943c050e98e2f9b0db9..e38356212dedbc54bc4f964abbcd357ba5e19e10 100644 --- a/rvc/lib/tools/prerequisites_download.py +++ b/rvc/lib/tools/prerequisites_download.py @@ -4,6 +4,7 @@ from tqdm import tqdm import requests url_base = "https://huggingface.co/IAHispano/Applio/resolve/main/Resources" + pretraineds_v1_list = [ ( "pretrained_v1/", @@ -21,7 +22,7 @@ pretraineds_v1_list = [ "f0G40k.pth", "f0G48k.pth", ], - ), + ) ] pretraineds_v2_list = [ ( @@ -40,108 +41,120 @@ pretraineds_v2_list = [ "f0G40k.pth", "f0G48k.pth", ], - ), + ) ] - -models_list = [ - ( - "predictors/", - [ - "rmvpe.pt", - "fcpe.pt", - ], - ), +models_list = [("predictors/", ["rmvpe.pt", "fcpe.pt"])] +embedders_list = [("embedders/contentvec/", ["pytorch_model.bin", "config.json"])] +linux_executables_list = [("formant/", ["stftpitchshift"])] +executables_list = [ + ("", ["ffmpeg.exe", "ffprobe.exe"]), + ("formant/", ["stftpitchshift.exe"]), ] -embedders_list = [ - ( - "embedders/", - [ - "contentvec_base.pt", - ], - ), -] - - -executables_list = ["ffmpeg.exe", "ffprobe.exe"] - folder_mapping_list = { "pretrained_v1/": "rvc/models/pretraineds/pretrained_v1/", "pretrained_v2/": "rvc/models/pretraineds/pretrained_v2/", - "embedders/": "rvc/models/embedders/", + "embedders/contentvec/": "rvc/models/embedders/contentvec/", "predictors/": "rvc/models/predictors/", + "formant/": "rvc/models/formant/", } -def download_file(url, destination_path, desc): - if not os.path.exists(destination_path): - os.makedirs(os.path.dirname(destination_path) or ".", exist_ok=True) - response = requests.get(url, stream=True) - total_size = int(response.headers.get("content-length", 0)) - block_size = 1024 - t = tqdm(total=total_size, unit="iB", unit_scale=True, desc=desc) - with open(destination_path, "wb") as file: - for data in response.iter_content(block_size): - t.update(len(data)) - file.write(data) - t.close() - if total_size != 0 and t.n != total_size: - print("ERROR: Something went wrong during the download") - - -def download_files(file_list): - with ThreadPoolExecutor() as executor: - futures = [] - for file_name in file_list: - destination_path = os.path.join(file_name) - url = f"{url_base}/{file_name}" - futures.append( - executor.submit(download_file, url, destination_path, file_name) - ) - for future in futures: - future.result() - - -def download_mapping_files(list): +def get_file_size_if_missing(file_list): + """ + Calculate the total size of files to be downloaded only if they do not exist locally. + """ + total_size = 0 + for remote_folder, files in file_list: + local_folder = folder_mapping_list.get(remote_folder, "") + for file in files: + destination_path = os.path.join(local_folder, file) + if not os.path.exists(destination_path): + url = f"{url_base}/{remote_folder}{file}" + response = requests.head(url) + total_size += int(response.headers.get("content-length", 0)) + return total_size + + +def download_file(url, destination_path, global_bar): + """ + Download a file from the given URL to the specified destination path, + updating the global progress bar as data is downloaded. + """ + + dir_name = os.path.dirname(destination_path) + if dir_name: + os.makedirs(dir_name, exist_ok=True) + response = requests.get(url, stream=True) + block_size = 1024 + with open(destination_path, "wb") as file: + for data in response.iter_content(block_size): + file.write(data) + global_bar.update(len(data)) + + +def download_mapping_files(file_mapping_list, global_bar): + """ + Download all files in the provided file mapping list using a thread pool executor, + and update the global progress bar as downloads progress. + """ with ThreadPoolExecutor() as executor: futures = [] - for remote_folder, file_list in list: + for remote_folder, file_list in file_mapping_list: local_folder = folder_mapping_list.get(remote_folder, "") for file in file_list: destination_path = os.path.join(local_folder, file) - url = f"{url_base}/{remote_folder}{file}" - futures.append( - executor.submit( - download_file, url, destination_path, f"{remote_folder}{file}" + if not os.path.exists(destination_path): + url = f"{url_base}/{remote_folder}{file}" + futures.append( + executor.submit( + download_file, url, destination_path, global_bar + ) ) - ) for future in futures: future.result() -def prequisites_download_pipeline(pretraineds_v1, pretraineds_v2, models, exe): - if models == True: - download_mapping_files(models_list) - download_mapping_files(embedders_list) - - if exe == True: - if os.name == "nt": - download_files(executables_list) - else: - print("Executable files are only available for Windows") - - if pretraineds_v1 == True: - download_mapping_files(pretraineds_v1_list) - - if pretraineds_v2 == True: - download_mapping_files(pretraineds_v2_list) +def calculate_total_size(pretraineds_v1, pretraineds_v2, models, exe): + """ + Calculate the total size of all files to be downloaded based on selected categories. + """ + total_size = 0 + if models: + total_size += get_file_size_if_missing(models_list) + total_size += get_file_size_if_missing(embedders_list) + if exe: + total_size += get_file_size_if_missing( + executables_list if os.name == "nt" else linux_executables_list + ) + if pretraineds_v1: + total_size += get_file_size_if_missing(pretraineds_v1_list) + if pretraineds_v2: + total_size += get_file_size_if_missing(pretraineds_v2_list) + return total_size - # Clear the console after all downloads are completed - clear_console() - -def clear_console(): - if os.name == "nt": - os.system("cls") +def prequisites_download_pipeline(pretraineds_v1, pretraineds_v2, models, exe): + """ + Manage the download pipeline for different categories of files. + """ + total_size = calculate_total_size(pretraineds_v1, pretraineds_v2, models, exe) + + if total_size > 0: + with tqdm( + total=total_size, unit="iB", unit_scale=True, desc="Downloading all files" + ) as global_bar: + if models: + download_mapping_files(models_list, global_bar) + download_mapping_files(embedders_list, global_bar) + if exe: + download_mapping_files( + executables_list if os.name == "nt" else linux_executables_list, + global_bar, + ) + if pretraineds_v1: + download_mapping_files(pretraineds_v1_list, global_bar) + if pretraineds_v2: + download_mapping_files(pretraineds_v2_list, global_bar) else: - os.system("clear") + pass diff --git a/rvc/lib/utils.py b/rvc/lib/utils.py index 89fe5f7bff9ab4969cbb43975ce13d0a6615e083..429a17218b59d95f233363d50b9306de4b8391ab 100644 --- a/rvc/lib/utils.py +++ b/rvc/lib/utils.py @@ -4,16 +4,36 @@ import soundfile as sf import numpy as np import re import unicodedata -from fairseq import checkpoint_utils import wget +import subprocess +from pydub import AudioSegment +import tempfile +from torch import nn import logging +from transformers import HubertModel +import warnings -logging.getLogger("fairseq").setLevel(logging.WARNING) +# Remove this to see warnings about transformers models +warnings.filterwarnings("ignore") + +logging.getLogger("fairseq").setLevel(logging.ERROR) +logging.getLogger("faiss.loader").setLevel(logging.ERROR) +logging.getLogger("transformers").setLevel(logging.ERROR) +logging.getLogger("torch").setLevel(logging.ERROR) now_dir = os.getcwd() sys.path.append(now_dir) +base_path = os.path.join(now_dir, "rvc", "models", "formant", "stftpitchshift") +stft = base_path + ".exe" if sys.platform == "win32" else base_path + + +class HubertModelWithFinalProj(HubertModel): + def __init__(self, config): + super().__init__(config) + self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size) + def load_audio(file, sample_rate): try: @@ -29,6 +49,53 @@ def load_audio(file, sample_rate): return audio.flatten() +def load_audio_infer( + file, sample_rate, formant_shifting, formant_qfrency, formant_timbre +): + try: + file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + if not os.path.isfile(file): + raise FileNotFoundError(f"File not found: {file}") + audio, sr = sf.read(file) + if len(audio.shape) > 1: + audio = librosa.to_mono(audio.T) + if sr != sample_rate: + audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate) + if formant_shifting: + audio = (audio * 32767).astype(np.int16) + audio_segment = AudioSegment( + audio.tobytes(), + frame_rate=sample_rate, + sample_width=2, + channels=1, + ) + with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: + temp_file_path = temp_file.name + audio_segment.export(temp_file_path, format="wav") + + command = [ + stft, + "-i", + temp_file_path, + "-q", + str(formant_qfrency), + "-t", + str(formant_timbre), + "-o", + f"{temp_file_path}_formatted.wav", + ] + subprocess.run(command, shell=True) + formatted_audio_path = f"{temp_file_path}_formatted.wav" + audio, sr = sf.read(formatted_audio_path) + if len(audio.shape) > 1: + audio = librosa.to_mono(audio.T) + if sr != sample_rate: + audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate) + except Exception as error: + raise RuntimeError(f"An error occurred loading the audio: {error}") + return audio.flatten() + + def format_title(title): formatted_title = ( unicodedata.normalize("NFKD", title).encode("ascii", "ignore").decode("utf-8") @@ -42,34 +109,45 @@ def format_title(title): def load_embedding(embedder_model, custom_embedder=None): embedder_root = os.path.join(now_dir, "rvc", "models", "embedders") embedding_list = { - "contentvec": os.path.join(embedder_root, "contentvec_base.pt"), - "japanese-hubert-base": os.path.join(embedder_root, "japanese-hubert-base.pt"), - "chinese-hubert-large": os.path.join(embedder_root, "chinese-hubert-large.pt"), + "contentvec": os.path.join(embedder_root, "contentvec"), + "chinese-hubert-base": os.path.join(embedder_root, "chinese_hubert_base"), + "japanese-hubert-base": os.path.join(embedder_root, "japanese_hubert_base"), + "korean-hubert-base": os.path.join(embedder_root, "korean_hubert_base"), } online_embedders = { - "japanese-hubert-base": "https://huggingface.co/rinna/japanese-hubert-base/resolve/main/fairseq/model.pt", - "chinese-hubert-large": "https://huggingface.co/TencentGameMate/chinese-hubert-large/resolve/main/chinese-hubert-large-fairseq-ckpt.pt", + "contentvec": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/contentvec/pytorch_model.bin", + "chinese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/chinese_hubert_base/pytorch_model.bin", + "japanese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/japanese_hubert_base/pytorch_model.bin", + "korean-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/korean_hubert_base/pytorch_model.bin", + } + + config_files = { + "contentvec": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/contentvec/config.json", + "chinese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/chinese_hubert_base/config.json", + "japanese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/japanese_hubert_base/config.json", + "korean-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/korean_hubert_base/config.json", } if embedder_model == "custom": - model_path = custom_embedder - if not custom_embedder and os.path.exists(custom_embedder): + if os.path.exists(custom_embedder): + model_path = custom_embedder + else: + print(f"Custom embedder not found: {custom_embedder}, using contentvec") model_path = embedding_list["contentvec"] else: model_path = embedding_list[embedder_model] - if embedder_model in online_embedders: - model_path = embedding_list[embedder_model] + bin_file = os.path.join(model_path, "pytorch_model.bin") + json_file = os.path.join(model_path, "config.json") + os.makedirs(model_path, exist_ok=True) + if not os.path.exists(bin_file): url = online_embedders[embedder_model] - print(f"\nDownloading {url} to {model_path}...") - wget.download(url, out=model_path) - else: - model_path = embedding_list["contentvec"] - - models = checkpoint_utils.load_model_ensemble_and_task( - [model_path], - suffix="", - ) + print(f"Downloading {url} to {model_path}...") + wget.download(url, out=bin_file) + if not os.path.exists(json_file): + url = config_files[embedder_model] + print(f"Downloading {url} to {model_path}...") + wget.download(url, out=json_file) - # print(f"Embedding model {embedder_model} loaded successfully.") + models = HubertModelWithFinalProj.from_pretrained(model_path) return models diff --git a/rvc/models/embedders/embedders_custom/.gitkeep b/rvc/models/embedders/embedders_custom/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/rvc/models/embedders/embedders_custom/.gitkeep @@ -0,0 +1 @@ + diff --git a/rvc/models/formant/.gitkeep b/rvc/models/formant/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/rvc/models/formant/.gitkeep @@ -0,0 +1 @@ + diff --git a/rvc/models/pretraineds/pretrained_v2/D32k.pth b/rvc/models/pretraineds/pretrained_v2/D32k.pth deleted file mode 100644 index b55340b0bc2875f148b245e781d534c5dd4f7894..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/D32k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d8043378cc6619083d385f5a045de09b83fb3bf8de45c433ca863b71723ac3ca -size 142875703 diff --git a/rvc/models/pretraineds/pretrained_v2/D40k.pth b/rvc/models/pretraineds/pretrained_v2/D40k.pth deleted file mode 100644 index 6d13aea9208310573b59309a9c80310ef71c5547..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/D40k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:471378e894e7191f89a94eda8288c5947b16bbe0b10c3f1f17efdb7a1d998242 -size 142875703 diff --git a/rvc/models/pretraineds/pretrained_v2/D48k.pth b/rvc/models/pretraineds/pretrained_v2/D48k.pth deleted file mode 100644 index 2f3b46950856921425d3dd2a0857ce14fc7dc622..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/D48k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db01094a93c09868a278e03dafe8bb781bfcc1a5ba8df168c948bf9168c84d82 -size 142875703 diff --git a/rvc/models/pretraineds/pretrained_v2/G32k.pth b/rvc/models/pretraineds/pretrained_v2/G32k.pth deleted file mode 100644 index d34c78904a383344a31fa9814782e55d82d852ae..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/G32k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:869b26a47f75168d6126f64ac39e6de5247017a8658cfd68aca600f7323efb9f -size 73811639 diff --git a/rvc/models/pretraineds/pretrained_v2/G40k.pth b/rvc/models/pretraineds/pretrained_v2/G40k.pth deleted file mode 100644 index ee39bf64a1fc1d0d8154e242a3b60ef3e2abf0ca..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/G40k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a3843da7fde33db1dab176146c70d6c2df06eafe9457f4e3aa10024e9c6a4b69 -size 72959671 diff --git a/rvc/models/pretraineds/pretrained_v2/G48k.pth b/rvc/models/pretraineds/pretrained_v2/G48k.pth deleted file mode 100644 index 71fb5f1037137dae5ae4a5786c452197f3eabab1..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/G48k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e2b1581a436d07a76b10b9d38765f64aa02836dc65c7dee1ce4140c11ea158b -size 75318967 diff --git a/rvc/models/pretraineds/pretrained_v2/f0D32k.pth b/rvc/models/pretraineds/pretrained_v2/f0D32k.pth deleted file mode 100644 index 7449cc83c3f97fd87c62596f005d262120b64772..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/f0D32k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd7134e7793674c85474d5145d2d982e3c5d8124fc7bb6c20f710ed65808fa8a -size 142875703 diff --git a/rvc/models/pretraineds/pretrained_v2/f0D40k.pth b/rvc/models/pretraineds/pretrained_v2/f0D40k.pth deleted file mode 100644 index 68e29fb9debf8994f68cb4f31cc4d81d360fb73b..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/f0D40k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b6ab091e70801b28e3f41f335f2fc5f3f35c75b39ae2628d419644ec2b0fa09 -size 142875703 diff --git a/rvc/models/pretraineds/pretrained_v2/f0D48k.pth b/rvc/models/pretraineds/pretrained_v2/f0D48k.pth deleted file mode 100644 index a9f2b74cdccf9bfc2ebb0be8b481c76552a28661..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/f0D48k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2269b73c7a4cf34da09aea99274dabf99b2ddb8a42cbfb065fb3c0aa9a2fc748 -size 142875703 diff --git a/rvc/models/pretraineds/pretrained_v2/f0G32k.pth b/rvc/models/pretraineds/pretrained_v2/f0G32k.pth deleted file mode 100644 index 6bff869bb04fae663dddd1fbe5c04b497bfc4209..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/f0G32k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2332611297b8d88c7436de8f17ef5f07a2119353e962cd93cda5806d59a1133d -size 73950049 diff --git a/rvc/models/pretraineds/pretrained_v2/f0G40k.pth b/rvc/models/pretraineds/pretrained_v2/f0G40k.pth deleted file mode 100644 index 3333a970abc908b4e8afd45c1cc2120ce1b4b0b4..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/f0G40k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b2c44035e782c4b14ddc0bede9e2f4a724d025cd073f736d4f43708453adfcb -size 73106273 diff --git a/rvc/models/pretraineds/pretrained_v2/f0G48k.pth b/rvc/models/pretraineds/pretrained_v2/f0G48k.pth deleted file mode 100644 index 34d1ec0e2594379989cad7b72991152a9f97d99e..0000000000000000000000000000000000000000 --- a/rvc/models/pretraineds/pretrained_v2/f0G48k.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5d51f589cc3632d4eae36a315b4179397695042edc01d15312e1bddc2b764a4 -size 75465569 diff --git a/rvc/models/pretraineds/pretraineds_custom/.gitkeep b/rvc/models/pretraineds/pretraineds_custom/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/rvc/models/pretraineds/pretraineds_custom/.gitkeep @@ -0,0 +1 @@ + diff --git a/rvc/train/data_utils.py b/rvc/train/data_utils.py index 84dd4f176234864b90270138bb390d4ae7e2465c..2013558ff4e662dc68ef815eabcaa2ea37aa72a1 100644 --- a/rvc/train/data_utils.py +++ b/rvc/train/data_utils.py @@ -118,7 +118,7 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset): audio, sample_rate = load_wav_to_torch(filename) if sample_rate != self.sample_rate: raise ValueError( - "{} SR doesn't match target {} SR".format(sample_rate, self.sample_rate) + f"{sample_rate} SR doesn't match target {self.sample_rate} SR" ) audio_norm = audio audio_norm = audio_norm.unsqueeze(0) @@ -342,7 +342,7 @@ class TextAudioLoader(torch.utils.data.Dataset): audio, sample_rate = load_wav_to_torch(filename) if sample_rate != self.sample_rate: raise ValueError( - "{} SR doesn't match target {} SR".format(sample_rate, self.sample_rate) + f"{sample_rate} SR doesn't match target {self.sample_rate} SR" ) audio_norm = audio audio_norm = audio_norm.unsqueeze(0) diff --git a/rvc/train/extract/embedding_extractor.py b/rvc/train/extract/embedding_extractor.py deleted file mode 100644 index 9bf7349d117a25f9506d7d1d2182e3eff92e2502..0000000000000000000000000000000000000000 --- a/rvc/train/extract/embedding_extractor.py +++ /dev/null @@ -1,168 +0,0 @@ -import os -import sys -import tqdm -import torch -import torch.nn.functional as F -import soundfile as sf -import numpy as np -import time - -now_dir = os.getcwd() -sys.path.append(now_dir) - -from rvc.lib.utils import load_embedding -from rvc.configs.config import Config - -config = Config() - - -def setup_paths(exp_dir: str, version: str): - """Set up input and output paths.""" - wav_path = os.path.join(exp_dir, "sliced_audios_16k") - out_path = os.path.join( - exp_dir, "v1_extracted" if version == "v1" else "v2_extracted" - ) - os.makedirs(out_path, exist_ok=True) - return wav_path, out_path - - -def read_wave(wav_path: str, normalize: bool = False): - """Read a wave file and return its features.""" - wav, sr = sf.read(wav_path) - assert sr == 16000, "Sample rate must be 16000" - - feats = torch.from_numpy(wav) - feats = feats.half() if config.is_half else feats.float() - feats = feats.mean(-1) if feats.dim() == 2 else feats - feats = feats.view(1, -1) - - if normalize: - with torch.no_grad(): - feats = F.layer_norm(feats, feats.shape) - return feats - - -def process_file( - file: str, - wav_path: str, - out_path: str, - model: torch.nn.Module, - device: str, - version: str, - saved_cfg: Config, -): - """Process a single audio file.""" - wav_file_path = os.path.join(wav_path, file) - out_file_path = os.path.join(out_path, file.replace("wav", "npy")) - - if os.path.exists(out_file_path): - return - - # Load and prepare features - feats = read_wave(wav_file_path, normalize=saved_cfg.task.normalize) - - # Adjust dtype based on the device - dtype = torch.float16 if device.startswith("cuda") else torch.float32 - feats = feats.to(dtype).to(device) - - padding_mask = torch.BoolTensor(feats.shape).fill_(False).to(dtype).to(device) - - inputs = { - "source": feats, - "padding_mask": padding_mask, - "output_layer": 9 if version == "v1" else 12, - } - - with torch.no_grad(): - model = model.to(device).to(dtype) - - logits = model.extract_features(**inputs) - feats = model.final_proj(logits[0]) if version == "v1" else logits[0] - - feats = feats.squeeze(0).float().cpu().numpy() - if not np.isnan(feats).any(): - np.save(out_file_path, feats, allow_pickle=False) - else: - print(f"{file} contains NaN values and will be skipped.") - - -def main(): - """Main function to orchestrate the feature extraction process.""" - try: - exp_dir = str(sys.argv[1]) - version = str(sys.argv[2]) - gpus = str(sys.argv[3]) - embedder_model = str(sys.argv[4]) - embedder_model_custom = str(sys.argv[5]) if len(sys.argv) > 5 else None - - os.environ["CUDA_VISIBLE_DEVICES"] = gpus.replace("-", ",") - except IndexError: - print("Invalid arguments provided.") - sys.exit(1) - - wav_path, out_path = setup_paths(exp_dir, version) - - print("Starting feature extraction...") - start_time = time.time() - - models, saved_cfg, task = load_embedding(embedder_model, embedder_model_custom) - model = models[0] - - gpus = gpus.split("-") if gpus != "-" else ["cpu"] - - devices = [] - for gpu in gpus: - try: - if gpu != "cpu": - index = int(gpu) - if index < torch.cuda.device_count(): - devices.append(f"cuda:{index}") - else: - print( - f"Oops, there was an issue initializing GPU. Maybe you don't have a GPU? No worries, switching to CPU for now." - ) - devices.append("cpu") - else: - devices.append("cpu") - except ValueError: - f"Oops, there was an issue initializing GPU. Maybe you don't have a GPU? No worries, switching to CPU for now." - devices.append("cpu") - - paths = sorted(os.listdir(wav_path)) - if not paths: - print("No audio files found. Make sure you have provided the audios correctly.") - sys.exit(1) - - pbar = tqdm.tqdm(total=len(paths), desc="Embedding Extraction") - - # Create a list of tasks to be processed - tasks = [ - ( - file, - wav_path, - out_path, - model, - device, - version, - saved_cfg, - ) - for file in paths - if file.endswith(".wav") - for device in devices - ] - - # Process files - for task in tasks: - try: - process_file(*task) - except Exception as error: - print(f"An error occurred processing {task[0]}: {error}") - pbar.update(1) - - pbar.close() - elapsed_time = time.time() - start_time - print(f"Embedding extraction completed in {elapsed_time:.2f} seconds.") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/rvc/train/extract/extract.py b/rvc/train/extract/extract.py new file mode 100644 index 0000000000000000000000000000000000000000..606b461532d0b0d2a096884ce83c2986e598d73e --- /dev/null +++ b/rvc/train/extract/extract.py @@ -0,0 +1,286 @@ +import os +import sys +import glob +import time +import tqdm +import torch +import torchcrepe +import numpy as np +import concurrent.futures +import multiprocessing as mp + +# Zluda +if torch.cuda.is_available() and torch.cuda.get_device_name().endswith("[ZLUDA]"): + torch.backends.cudnn.enabled = False + torch.backends.cuda.enable_flash_sdp(False) + torch.backends.cuda.enable_math_sdp(True) + torch.backends.cuda.enable_mem_efficient_sdp(False) + +now_dir = os.getcwd() +sys.path.append(os.path.join(now_dir)) + +from rvc.lib.utils import load_audio, load_embedding +from rvc.train.extract.preparing_files import generate_config, generate_filelist +from rvc.lib.predictors.RMVPE import RMVPE0Predictor +from rvc.configs.config import Config + +# Load config +config = Config() + +mp.set_start_method("spawn", force=True) + + +class FeatureInput: + """Class for F0 extraction.""" + + def __init__(self, sample_rate=16000, hop_size=160, device="cpu"): + self.fs = sample_rate + self.hop = hop_size + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + self.device = device + self.model_rmvpe = None + + def compute_f0(self, np_arr, f0_method, hop_length): + """Extract F0 using the specified method.""" + if f0_method == "crepe": + return self.get_crepe(np_arr, hop_length) + elif f0_method == "rmvpe": + return self.model_rmvpe.infer_from_audio(np_arr, thred=0.03) + else: + raise ValueError(f"Unknown F0 method: {f0_method}") + + def get_crepe(self, x, hop_length): + """Extract F0 using CREPE.""" + audio = torch.from_numpy(x.astype(np.float32)).to(self.device) + audio /= torch.quantile(torch.abs(audio), 0.999) + audio = audio.unsqueeze(0) + pitch = torchcrepe.predict( + audio, + self.fs, + hop_length, + self.f0_min, + self.f0_max, + "full", + batch_size=hop_length * 2, + device=audio.device, + pad=True, + ) + source = pitch.squeeze(0).cpu().float().numpy() + source[source < 0.001] = np.nan + target = np.interp( + np.arange(0, len(source) * (x.size // self.hop), len(source)) + / (x.size // self.hop), + np.arange(0, len(source)), + source, + ) + return np.nan_to_num(target) + + def coarse_f0(self, f0): + """Convert F0 to coarse F0.""" + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel = np.clip( + (f0_mel - self.f0_mel_min) + * (self.f0_bin - 2) + / (self.f0_mel_max - self.f0_mel_min) + + 1, + 1, + self.f0_bin - 1, + ) + return np.rint(f0_mel).astype(int) + + def process_file(self, file_info, f0_method, hop_length): + """Process a single audio file for F0 extraction.""" + inp_path, opt_path1, opt_path2, _ = file_info + + if os.path.exists(opt_path1) and os.path.exists(opt_path2): + return + + try: + np_arr = load_audio(inp_path, 16000) + feature_pit = self.compute_f0(np_arr, f0_method, hop_length) + np.save(opt_path2, feature_pit, allow_pickle=False) + coarse_pit = self.coarse_f0(feature_pit) + np.save(opt_path1, coarse_pit, allow_pickle=False) + except Exception as error: + print( + f"An error occurred extracting file {inp_path} on {self.device}: {error}" + ) + + def process_files( + self, files, f0_method, hop_length, device_num, device, n_threads + ): + """Process multiple files.""" + self.device = device + if f0_method == "rmvpe": + self.model_rmvpe = RMVPE0Predictor( + os.path.join("rvc", "models", "predictors", "rmvpe.pt"), + is_half=False, + device=device, + ) + else: + n_threads = 1 + + n_threads = 1 if n_threads == 0 else n_threads + + def process_file_wrapper(file_info): + self.process_file(file_info, f0_method, hop_length) + + with tqdm.tqdm(total=len(files), leave=True, position=device_num) as pbar: + # using multi-threading + with concurrent.futures.ThreadPoolExecutor( + max_workers=n_threads + ) as executor: + futures = [ + executor.submit(process_file_wrapper, file_info) + for file_info in files + ] + for future in concurrent.futures.as_completed(futures): + pbar.update(1) + + +def run_pitch_extraction(files, devices, f0_method, hop_length, num_processes): + devices_str = ", ".join(devices) + print( + f"Starting pitch extraction with {num_processes} cores on {devices_str} using {f0_method}..." + ) + start_time = time.time() + fe = FeatureInput() + # split the task between devices + ps = [] + num_devices = len(devices) + for i, device in enumerate(devices): + p = mp.Process( + target=fe.process_files, + args=( + files[i::num_devices], + f0_method, + hop_length, + i, + device, + num_processes // num_devices, + ), + ) + ps.append(p) + p.start() + for i, device in enumerate(devices): + ps[i].join() + + elapsed_time = time.time() - start_time + print(f"Pitch extraction completed in {elapsed_time:.2f} seconds.") + + +def process_file_embedding( + files, version, embedder_model, embedder_model_custom, device_num, device, n_threads +): + dtype = torch.float16 if config.is_half and "cuda" in device else torch.float32 + model = load_embedding(embedder_model, embedder_model_custom).to(dtype).to(device) + n_threads = 1 if n_threads == 0 else n_threads + + def process_file_embedding_wrapper(file_info): + wav_file_path, _, _, out_file_path = file_info + if os.path.exists(out_file_path): + return + feats = torch.from_numpy(load_audio(wav_file_path, 16000)).to(dtype).to(device) + feats = feats.view(1, -1) + with torch.no_grad(): + feats = model(feats)["last_hidden_state"] + feats = ( + model.final_proj(feats[0]).unsqueeze(0) if version == "v1" else feats + ) + feats = feats.squeeze(0).float().cpu().numpy() + if not np.isnan(feats).any(): + np.save(out_file_path, feats, allow_pickle=False) + else: + print(f"{file} contains NaN values and will be skipped.") + + with tqdm.tqdm(total=len(files), leave=True, position=device_num) as pbar: + # using multi-threading + with concurrent.futures.ThreadPoolExecutor(max_workers=n_threads) as executor: + futures = [ + executor.submit(process_file_embedding_wrapper, file_info) + for file_info in files + ] + for future in concurrent.futures.as_completed(futures): + pbar.update(1) + + +def run_embedding_extraction( + files, devices, version, embedder_model, embedder_model_custom +): + start_time = time.time() + devices_str = ", ".join(devices) + print( + f"Starting embedding extraction with {num_processes} cores on {devices_str}..." + ) + # split the task between devices + ps = [] + num_devices = len(devices) + for i, device in enumerate(devices): + p = mp.Process( + target=process_file_embedding, + args=( + files[i::num_devices], + version, + embedder_model, + embedder_model_custom, + i, + device, + num_processes // num_devices, + ), + ) + ps.append(p) + p.start() + for i, device in enumerate(devices): + ps[i].join() + elapsed_time = time.time() - start_time + print(f"Embedding extraction completed in {elapsed_time:.2f} seconds.") + + +if __name__ == "__main__": + + exp_dir = sys.argv[1] + f0_method = sys.argv[2] + hop_length = int(sys.argv[3]) + num_processes = int(sys.argv[4]) + gpus = sys.argv[5] + version = sys.argv[6] + pitch_guidance = sys.argv[7] + sample_rate = sys.argv[8] + embedder_model = sys.argv[9] + embedder_model_custom = sys.argv[10] if len(sys.argv) > 10 else None + + # prep + wav_path = os.path.join(exp_dir, "sliced_audios_16k") + os.makedirs(os.path.join(exp_dir, "f0"), exist_ok=True) + os.makedirs(os.path.join(exp_dir, "f0_voiced"), exist_ok=True) + os.makedirs(os.path.join(exp_dir, version + "_extracted"), exist_ok=True) + + files = [] + for file in glob.glob(os.path.join(wav_path, "*.wav")): + file_name = os.path.basename(file) + file_info = [ + file, # full path to sliced 16k wav + os.path.join(exp_dir, "f0", file_name + ".npy"), + os.path.join(exp_dir, "f0_voiced", file_name + ".npy"), + os.path.join( + exp_dir, version + "_extracted", file_name.replace("wav", "npy") + ), + ] + files.append(file_info) + + devices = ["cpu"] if gpus == "-" else [f"cuda:{idx}" for idx in gpus.split("-")] + # Run Pitch Extraction + run_pitch_extraction(files, devices, f0_method, hop_length, num_processes) + + # Run Embedding Extraction + run_embedding_extraction( + files, devices, version, embedder_model, embedder_model_custom + ) + + # Run Preparing Files + generate_config(version, sample_rate, exp_dir) + generate_filelist(pitch_guidance, exp_dir, version, sample_rate) diff --git a/rvc/train/extract/pitch_extractor.py b/rvc/train/extract/pitch_extractor.py deleted file mode 100644 index 8da0a9c4e9b8d268dcf895ac877f96526dcf1ce2..0000000000000000000000000000000000000000 --- a/rvc/train/extract/pitch_extractor.py +++ /dev/null @@ -1,193 +0,0 @@ -import os -import sys -import time -import tqdm -import torch -import torchcrepe -import numpy as np -from multiprocessing import Pool -from functools import partial - -current_directory = os.getcwd() -sys.path.append(current_directory) - -from rvc.lib.utils import load_audio -from rvc.lib.predictors.RMVPE import RMVPE0Predictor - -# Parse command line arguments -exp_dir = str(sys.argv[1]) -f0_method = str(sys.argv[2]) -hop_length = int(sys.argv[3]) -num_processes = int(sys.argv[4]) -gpus = str(sys.argv[5]) # - = Use CPU - -os.environ["CUDA_VISIBLE_DEVICES"] = gpus.replace("-", ",") - - -class FeatureInput: - """Class for F0 extraction.""" - - def __init__(self, sample_rate=16000, hop_size=160, device="cpu"): - self.fs = sample_rate - self.hop = hop_size - self.f0_bin = 256 - self.f0_max = 1100.0 - self.f0_min = 50.0 - self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) - self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) - self.device = device - self.model_rmvpe = RMVPE0Predictor( - os.path.join("rvc", "models", "predictors", "rmvpe.pt"), - is_half=False, - device=device, - ) - - def compute_f0(self, np_arr, f0_method, hop_length): - """Extract F0 using the specified method.""" - p_len = np_arr.shape[0] // self.hop - - if f0_method == "crepe": - f0 = self.get_crepe(np_arr, p_len, hop_length) - elif f0_method == "rmvpe": - f0 = self.model_rmvpe.infer_from_audio(np_arr, thred=0.03) - else: - raise ValueError(f"Unknown F0 method: {f0_method}") - - return f0 - - def get_crepe(self, x, p_len, hop_length): - """Extract F0 using CREPE.""" - audio = torch.from_numpy(x.astype(np.float32)).to(self.device) - audio /= torch.quantile(torch.abs(audio), 0.999) - audio = torch.unsqueeze(audio, dim=0) - - pitch = torchcrepe.predict( - audio, - self.fs, - hop_length, - self.f0_min, - self.f0_max, - "full", - batch_size=hop_length * 2, - device=self.device, - pad=True, - ) - - source = pitch.squeeze(0).cpu().float().numpy() - source[source < 0.001] = np.nan - target = np.interp( - np.arange(0, len(source) * p_len, len(source)) / p_len, - np.arange(0, len(source)), - source, - ) - return np.nan_to_num(target) - - def coarse_f0(self, f0): - """Convert F0 to coarse F0.""" - f0_mel = 1127 * np.log(1 + f0 / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( - self.f0_bin - 2 - ) / (self.f0_mel_max - self.f0_mel_min) + 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 - f0_coarse = np.rint(f0_mel).astype(int) - assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( - f0_coarse.max(), - f0_coarse.min(), - ) - return f0_coarse - - def process_file(self, file_info, f0_method, hop_length): - """Process a single audio file for F0 extraction.""" - inp_path, opt_path1, opt_path2, np_arr = file_info - - if os.path.exists(opt_path1 + ".npy") and os.path.exists(opt_path2 + ".npy"): - return - - try: - feature_pit = self.compute_f0(np_arr, f0_method, hop_length) - np.save(opt_path2, feature_pit, allow_pickle=False) - coarse_pit = self.coarse_f0(feature_pit) - np.save(opt_path1, coarse_pit, allow_pickle=False) - except Exception as error: - print(f"An error occurred extracting file {inp_path}: {error}") - - def process_files(self, files, f0_method, hop_length, pbar): - """Process multiple files.""" - for file_info in files: - self.process_file(file_info, f0_method, hop_length) - pbar.update() - - -def main(exp_dir, f0_method, hop_length, num_processes, gpus): - paths = [] - input_root = os.path.join(exp_dir, "sliced_audios_16k") - output_root1 = os.path.join(exp_dir, "f0") - output_root2 = os.path.join(exp_dir, "f0_voiced") - - os.makedirs(output_root1, exist_ok=True) - os.makedirs(output_root2, exist_ok=True) - - for name in sorted(os.listdir(input_root)): - if "spec" in name: - continue - input_path = os.path.join(input_root, name) - output_path1 = os.path.join(output_root1, name) - output_path2 = os.path.join(output_root2, name) - np_arr = load_audio(input_path, 16000) - paths.append([input_path, output_path1, output_path2, np_arr]) - - print(f"Starting extraction with {num_processes} cores and {f0_method}...") - - start_time = time.time() - - if gpus != "-": - gpus = gpus.split("-") - num_gpus = len(gpus) - process_partials = [] - pbar = tqdm.tqdm(total=len(paths), desc="Pitch Extraction") - - for idx, gpu in enumerate(gpus): - device = f"cuda:{gpu}" - if torch.cuda.is_available() and torch.cuda.device_count() > idx: - try: - feature_input = FeatureInput(device=device) - part_paths = paths[idx::num_gpus] - process_partials.append((feature_input, part_paths)) - except Exception as error: - print( - f"Oops, there was an issue initializing GPU {device} ({error}). Maybe you don't have a GPU? No worries, switching to CPU for now." - ) - feature_input = FeatureInput(device="cpu") - part_paths = paths[idx::num_gpus] - process_partials.append((feature_input, part_paths)) - else: - print(f"GPU {device} is not available. Switching to CPU.") - feature_input = FeatureInput(device="cpu") - part_paths = paths[idx::num_gpus] - process_partials.append((feature_input, part_paths)) - - # Process each part with the corresponding GPU or CPU - for feature_input, part_paths in process_partials: - feature_input.process_files(part_paths, f0_method, hop_length, pbar) - pbar.close() - - else: - # Use multiprocessing Pool for parallel processing with progress bar - feature_input = FeatureInput(device="cpu") - with tqdm.tqdm(total=len(paths), desc="Pitch Extraction") as pbar: - pool = Pool(processes=num_processes) - process_file_partial = partial( - feature_input.process_file, f0_method=f0_method, hop_length=hop_length - ) - for _ in pool.imap_unordered(process_file_partial, paths): - pbar.update() - pool.close() - pool.join() - - elapsed_time = time.time() - start_time - print(f"Pitch extraction completed in {elapsed_time:.2f} seconds.") - - -if __name__ == "__main__": - main(exp_dir, f0_method, hop_length, num_processes, gpus) \ No newline at end of file diff --git a/rvc/train/extract/preparing_files.py b/rvc/train/extract/preparing_files.py index 71f10bec7e4f4129c6223f7867dfddcbeb677b74..3956eb0c898ab1fc1c1e998689a3a9ce34830db3 100644 --- a/rvc/train/extract/preparing_files.py +++ b/rvc/train/extract/preparing_files.py @@ -1,7 +1,6 @@ import os import shutil from random import shuffle - from rvc.configs.config import Config config = Config() @@ -18,43 +17,53 @@ def generate_config(rvc_version: str, sample_rate: int, model_path: str): def generate_filelist( pitch_guidance: bool, model_path: str, rvc_version: str, sample_rate: int ): - gt_wavs_dir = f"{model_path}/sliced_audios" - feature_dir = ( - f"{model_path}/v1_extracted" - if rvc_version == "v1" - else f"{model_path}/v2_extracted" - ) - if pitch_guidance == True: - f0_dir = f"{model_path}/f0" - f0nsf_dir = f"{model_path}/f0_voiced" - names = ( - set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) - & set([name.split(".")[0] for name in os.listdir(feature_dir)]) - & set([name.split(".")[0] for name in os.listdir(f0_dir)]) - & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) - ) - elif pitch_guidance == False: - names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( - [name.split(".")[0] for name in os.listdir(feature_dir)] - ) + gt_wavs_dir = os.path.join(model_path, "sliced_audios") + feature_dir = os.path.join(model_path, f"{rvc_version}_extracted") + + f0_dir, f0nsf_dir = None, None + if pitch_guidance: + f0_dir = os.path.join(model_path, "f0") + f0nsf_dir = os.path.join(model_path, "f0_voiced") + + gt_wavs_files = set(name.split(".")[0] for name in os.listdir(gt_wavs_dir)) + feature_files = set(name.split(".")[0] for name in os.listdir(feature_dir)) + + if pitch_guidance: + f0_files = set(name.split(".")[0] for name in os.listdir(f0_dir)) + f0nsf_files = set(name.split(".")[0] for name in os.listdir(f0nsf_dir)) + names = gt_wavs_files & feature_files & f0_files & f0nsf_files + else: + names = gt_wavs_files & feature_files + options = [] + mute_base_path = os.path.join(current_directory, "logs", "mute") + for name in names: - if pitch_guidance == 1: + if pitch_guidance: options.append( f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|{f0_dir}/{name}.wav.npy|{f0nsf_dir}/{name}.wav.npy|0" ) else: options.append(f"{gt_wavs_dir}/{name}.wav|{feature_dir}/{name}.npy|0") - if pitch_guidance == 1: - for _ in range(2): - options.append( - f"{current_directory}/logs/mute/sliced_audios/mute{sample_rate}.wav|{current_directory}/logs/mute/{rvc_version}_extracted/mute.npy|{current_directory}/logs/mute/f0/mute.wav.npy|{current_directory}/logs/mute/f0_voiced/mute.wav.npy|0" - ) - else: - for _ in range(2): + + mute_audio_path = os.path.join( + mute_base_path, "sliced_audios", f"mute{sample_rate}.wav" + ) + mute_feature_path = os.path.join( + mute_base_path, f"{rvc_version}_extracted", "mute.npy" + ) + + for _ in range(2): + if pitch_guidance: + mute_f0_path = os.path.join(mute_base_path, "f0", "mute.wav.npy") + mute_f0nsf_path = os.path.join(mute_base_path, "f0_voiced", "mute.wav.npy") options.append( - f"{current_directory}/logs/mute/sliced_audios/mute{sample_rate}.wav|{current_directory}/logs/mute/{rvc_version}_extracted/mute.npy|0" + f"{mute_audio_path}|{mute_feature_path}|{mute_f0_path}|{mute_f0nsf_path}|0" ) + else: + options.append(f"{mute_audio_path}|{mute_feature_path}|0") + shuffle(options) - with open(f"{model_path}/filelist.txt", "w") as f: + + with open(os.path.join(model_path, "filelist.txt"), "w") as f: f.write("\n".join(options)) diff --git a/rvc/train/mel_processing.py b/rvc/train/mel_processing.py index ecad64896e9a858524bafcee60c8bae930b71622..1f5e8f38b72f0cd9ec89ba0b725860f512d2f607 100644 --- a/rvc/train/mel_processing.py +++ b/rvc/train/mel_processing.py @@ -76,6 +76,12 @@ def spectrogram_torch(y, n_fft, hop_size, win_size, center=False): ) y = y.squeeze(1) + # Zluda, fall-back to CPU for FFTs since HIP SDK has no cuFFT alternative + source_device = y.device + if y.device.type == "cuda" and torch.cuda.get_device_name().endswith("[ZLUDA]"): + y = y.to("cpu") + hann_window[wnsize_dtype_device] = hann_window[wnsize_dtype_device].to("cpu") + spec = torch.stft( y, n_fft, @@ -87,7 +93,7 @@ def spectrogram_torch(y, n_fft, hop_size, win_size, center=False): normalized=False, onesided=True, return_complex=True, - ) + ).to(source_device) spec = torch.sqrt(spec.real.pow(2) + spec.imag.pow(2) + 1e-6) diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py index a959a078eced0dec860d47c98bfe66577aa79330..15b3a151ccda45782238d91cf256c876129b6f4f 100644 --- a/rvc/train/preprocess/preprocess.py +++ b/rvc/train/preprocess/preprocess.py @@ -1,38 +1,36 @@ import os import sys import time -import librosa -import numpy as np from scipy import signal from scipy.io import wavfile -from multiprocessing import cpu_count, Pool -from pydub import AudioSegment +import numpy as np +import concurrent.futures +from tqdm import tqdm +import json +from distutils.util import strtobool +import librosa +import multiprocessing now_directory = os.getcwd() sys.path.append(now_directory) from rvc.lib.utils import load_audio -from rvc.train.slicer import Slicer +from rvc.train.preprocess.slicer import Slicer + +# Remove colab logs +import logging -# Parse command line arguments -experiment_directory = str(sys.argv[1]) -input_root = str(sys.argv[2]) -sample_rate = int(sys.argv[3]) -percentage = float(sys.argv[4]) -num_processes = int(sys.argv[5]) if len(sys.argv) > 5 else cpu_count() +logging.getLogger("numba.core.byteflow").setLevel(logging.WARNING) +logging.getLogger("numba.core.ssa").setLevel(logging.WARNING) +logging.getLogger("numba.core.interpreter").setLevel(logging.WARNING) -# Define constants +# Constants OVERLAP = 0.3 -TAIL = percentage + OVERLAP MAX_AMPLITUDE = 0.9 ALPHA = 0.75 HIGH_PASS_CUTOFF = 48 SAMPLE_RATE_16K = 16000 -# Define directory paths -GT_WAVS_DIR = os.path.join(experiment_directory, "sliced_audios") -WAVS16K_DIR = os.path.join(experiment_directory, "sliced_audios_16k") - class PreProcess: def __init__(self, sr: int, exp_dir: str, per: float): @@ -50,103 +48,176 @@ class PreProcess: ) self.per = per self.exp_dir = exp_dir + self.device = "cpu" + self.gt_wavs_dir = os.path.join(exp_dir, "sliced_audios") + self.wavs16k_dir = os.path.join(exp_dir, "sliced_audios_16k") + os.makedirs(self.gt_wavs_dir, exist_ok=True) + os.makedirs(self.wavs16k_dir, exist_ok=True) def _normalize_audio(self, audio: np.ndarray): - """Normalizes the audio to the desired amplitude.""" tmp_max = np.abs(audio).max() if tmp_max > 2.5: - return None # Indicate audio should be filtered out + return None return (audio / tmp_max * (MAX_AMPLITUDE * ALPHA)) + (1 - ALPHA) * audio - def _write_audio(self, audio: np.ndarray, filename: str, sr: int): - """Writes the audio to a WAV file.""" - wavfile.write(filename, sr, audio.astype(np.float32)) - - def process_audio_segment(self, audio_segment: np.ndarray, idx0: int, idx1: int): - """Processes a single audio segment.""" - normalized_audio = self._normalize_audio(audio_segment) + def process_audio_segment( + self, + audio_segment: np.ndarray, + idx0: int, + idx1: int, + process_effects: bool, + ): + normalized_audio = ( + self._normalize_audio(audio_segment) if process_effects else audio_segment + ) if normalized_audio is None: print(f"{idx0}-{idx1}-filtered") return - - # Write original sample rate audio - gt_wav_path = os.path.join(GT_WAVS_DIR, f"{idx0}_{idx1}.wav") - self._write_audio(normalized_audio, gt_wav_path, self.sr) - - # Resample and write 16kHz audio + wavfile.write( + os.path.join(self.gt_wavs_dir, f"{idx0}_{idx1}.wav"), + self.sr, + normalized_audio.astype(np.float32), + ) audio_16k = librosa.resample( normalized_audio, orig_sr=self.sr, target_sr=SAMPLE_RATE_16K ) - wav_16k_path = os.path.join(WAVS16K_DIR, f"{idx0}_{idx1}.wav") - self._write_audio(audio_16k, wav_16k_path, SAMPLE_RATE_16K) + wavfile.write( + os.path.join(self.wavs16k_dir, f"{idx0}_{idx1}.wav"), + SAMPLE_RATE_16K, + audio_16k.astype(np.float32), + ) - def process_audio(self, path: str, idx0: int): - """Processes a single audio file.""" + def process_audio( + self, + path: str, + idx0: int, + cut_preprocess: bool, + process_effects: bool, + ): + audio_length = 0 try: audio = load_audio(path, self.sr) - audio = signal.lfilter(self.b_high, self.a_high, audio) - + audio_length = librosa.get_duration(y=audio, sr=self.sr) + if process_effects: + audio = signal.lfilter(self.b_high, self.a_high, audio) idx1 = 0 - for audio_segment in self.slicer.slice(audio): - i = 0 - while True: - start = int(self.sr * (self.per - OVERLAP) * i) - i += 1 - if len(audio_segment[start:]) > TAIL * self.sr: - tmp_audio = audio_segment[ - start : start + int(self.per * self.sr) - ] - self.process_audio_segment(tmp_audio, idx0, idx1) - idx1 += 1 - else: - tmp_audio = audio_segment[start:] - self.process_audio_segment(tmp_audio, idx0, idx1) - idx1 += 1 - break + if cut_preprocess: + for audio_segment in self.slicer.slice(audio): + i = 0 + while True: + start = int(self.sr * (self.per - OVERLAP) * i) + i += 1 + if len(audio_segment[start:]) > (self.per + OVERLAP) * self.sr: + tmp_audio = audio_segment[ + start : start + int(self.per * self.sr) + ] + self.process_audio_segment( + tmp_audio, idx0, idx1, process_effects + ) + idx1 += 1 + else: + tmp_audio = audio_segment[start:] + self.process_audio_segment( + tmp_audio, idx0, idx1, process_effects + ) + idx1 += 1 + break + else: + self.process_audio_segment(audio, idx0, idx1, process_effects) except Exception as error: - print(f"An error occurred on {path} path: {error}") - - def process_audio_file(self, file_path_idx): - file_path, idx0 = file_path_idx - # Convert the audio file to WAV format using pydub if necessary - ext = os.path.splitext(file_path)[1].lower() - if ext not in [".wav"]: - audio = AudioSegment.from_file(file_path) - file_path = os.path.join("/tmp", f"{idx0}.wav") - audio.export(file_path, format="wav") - self.process_audio(file_path, idx0) - - def process_audio_multiprocessing_input_directory( - self, input_root: str, num_processes: int - ): - # Get list of files - files = [ - (os.path.join(input_root, f), idx) - for idx, f in enumerate(os.listdir(input_root)) - if f.lower().endswith((".wav", ".mp3", ".flac", ".ogg")) - ] + print(f"Error processing audio: {error}") + return audio_length + + +def format_duration(seconds): + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + seconds = int(seconds % 60) + return f"{hours:02}:{minutes:02}:{seconds:02}" - # Create the directories if they don't exist - os.makedirs(GT_WAVS_DIR, exist_ok=True) - os.makedirs(WAVS16K_DIR, exist_ok=True) - # Use multiprocessing to process files - with Pool(processes=num_processes) as pool: - pool.map(self.process_audio_file, files) +def save_dataset_duration(file_path, dataset_duration): + try: + with open(file_path, "r") as f: + data = json.load(f) + except FileNotFoundError: + data = {} + + formatted_duration = format_duration(dataset_duration) + new_data = { + "total_dataset_duration": formatted_duration, + "total_seconds": dataset_duration, + } + data.update(new_data) + + with open(file_path, "w") as f: + json.dump(data, f, indent=4) + + +def process_audio_wrapper(args): + pp, file, cut_preprocess, process_effects = args + file_path, idx0 = file + return pp.process_audio(file_path, idx0, cut_preprocess, process_effects) def preprocess_training_set( - input_root: str, sr: int, num_processes: int, exp_dir: str, per: float + input_root: str, + sr: int, + num_processes: int, + exp_dir: str, + per: float, + cut_preprocess: bool, + process_effects: bool, ): start_time = time.time() pp = PreProcess(sr, exp_dir, per) - print(f"Starting preprocess with {num_processes} cores...") - pp.process_audio_multiprocessing_input_directory(input_root, num_processes) + print(f"Starting preprocess with {num_processes} processes...") + + files = [ + (os.path.join(input_root, f), idx) + for idx, f in enumerate(os.listdir(input_root)) + if f.lower().endswith((".wav", ".mp3", ".flac", ".ogg")) + ] + # print(f"Number of files: {len(files)}") + with concurrent.futures.ThreadPoolExecutor(max_workers=num_processes) as executor: + audio_length = list( + tqdm( + executor.map( + process_audio_wrapper, + [(pp, file, cut_preprocess, process_effects) for file in files], + ), + total=len(files), + ) + ) + audio_length = sum(audio_length) + save_dataset_duration( + os.path.join(exp_dir, "model_info.json"), dataset_duration=audio_length + ) elapsed_time = time.time() - start_time - print(f"Preprocess completed in {elapsed_time:.2f} seconds.") + print( + f"Preprocess completed in {elapsed_time:.2f} seconds on {format_duration(audio_length)} seconds of audio." + ) if __name__ == "__main__": + experiment_directory = str(sys.argv[1]) + input_root = str(sys.argv[2]) + sample_rate = int(sys.argv[3]) + percentage = float(sys.argv[4]) + num_processes = sys.argv[5] + if num_processes.lower() == "none": + num_processes = multiprocessing.cpu_count() + else: + num_processes = int(num_processes) + cut_preprocess = strtobool(sys.argv[6]) + process_effects = strtobool(sys.argv[7]) + preprocess_training_set( - input_root, sample_rate, num_processes, experiment_directory, percentage - ) \ No newline at end of file + input_root, + sample_rate, + num_processes, + experiment_directory, + percentage, + cut_preprocess, + process_effects, + ) diff --git a/rvc/train/slicer.py b/rvc/train/preprocess/slicer.py similarity index 100% rename from rvc/train/slicer.py rename to rvc/train/preprocess/slicer.py diff --git a/rvc/train/process/extract_index.py b/rvc/train/process/extract_index.py index 7e80796cf2e63e689bdaedf04a00323b4ac4e5c8..bdbd779fbe0c2b639d1c4fc521f90ca661deb1f3 100644 --- a/rvc/train/process/extract_index.py +++ b/rvc/train/process/extract_index.py @@ -8,72 +8,78 @@ from multiprocessing import cpu_count # Parse command line arguments exp_dir = str(sys.argv[1]) version = str(sys.argv[2]) +index_algorithm = str(sys.argv[3]) try: feature_dir = os.path.join(exp_dir, f"{version}_extracted") model_name = os.path.basename(exp_dir) - npys = [] - listdir_res = sorted(os.listdir(feature_dir)) - - for name in listdir_res: - file_path = os.path.join(feature_dir, name) - phone = np.load(file_path) - npys.append(phone) - - big_npy = np.concatenate(npys, axis=0) + index_filename_added = f"added_{model_name}_{version}.index" + index_filepath_added = os.path.join(exp_dir, index_filename_added) - big_npy_idx = np.arange(big_npy.shape[0]) - np.random.shuffle(big_npy_idx) - big_npy = big_npy[big_npy_idx] + index_filename_trained = f"trained_{model_name}_{version}.index" + index_filepath_trained = os.path.join(exp_dir, index_filename_trained) - if big_npy.shape[0] > 2e5: - big_npy = ( - MiniBatchKMeans( - n_clusters=10000, - verbose=True, - batch_size=256 * cpu_count(), - compute_labels=False, - init="random", + if os.path.exists(index_filepath_trained) and os.path.exists(index_filepath_added): + pass + else: + npys = [] + listdir_res = sorted(os.listdir(feature_dir)) + + for name in listdir_res: + file_path = os.path.join(feature_dir, name) + phone = np.load(file_path) + npys.append(phone) + + big_npy = np.concatenate(npys, axis=0) + + big_npy_idx = np.arange(big_npy.shape[0]) + np.random.shuffle(big_npy_idx) + big_npy = big_npy[big_npy_idx] + + if big_npy.shape[0] > 2e5 and ( + index_algorithm == "Auto" or index_algorithm == "KMeans" + ): + big_npy = ( + MiniBatchKMeans( + n_clusters=10000, + verbose=True, + batch_size=256 * cpu_count(), + compute_labels=False, + init="random", + ) + .fit(big_npy) + .cluster_centers_ ) - .fit(big_npy) - .cluster_centers_ - ) - np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy) + np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy) - n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) - - # index_trained - index_trained = faiss.index_factory( - 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" - ) - index_ivf_trained = faiss.extract_index_ivf(index_trained) - index_ivf_trained.nprobe = 1 - index_trained.train(big_npy) + n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) - index_filename_trained = f"trained_{model_name}_{version}.index" - index_filepath_trained = os.path.join(exp_dir, index_filename_trained) - - faiss.write_index(index_trained, index_filepath_trained) + # index_trained + index_trained = faiss.index_factory( + 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" + ) + index_ivf_trained = faiss.extract_index_ivf(index_trained) + index_ivf_trained.nprobe = 1 + index_trained.train(big_npy) - # index_added - index_added = faiss.index_factory( - 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" - ) - index_ivf_added = faiss.extract_index_ivf(index_added) - index_ivf_added.nprobe = 1 - index_added.train(big_npy) + faiss.write_index(index_trained, index_filepath_trained) - index_filename_added = f"added_{model_name}_{version}.index" - index_filepath_added = os.path.join(exp_dir, index_filename_added) + # index_added + index_added = faiss.index_factory( + 256 if version == "v1" else 768, f"IVF{n_ivf},Flat" + ) + index_ivf_added = faiss.extract_index_ivf(index_added) + index_ivf_added.nprobe = 1 + index_added.train(big_npy) - batch_size_add = 8192 - for i in range(0, big_npy.shape[0], batch_size_add): - index_added.add(big_npy[i : i + batch_size_add]) + batch_size_add = 8192 + for i in range(0, big_npy.shape[0], batch_size_add): + index_added.add(big_npy[i : i + batch_size_add]) - faiss.write_index(index_added, index_filepath_added) - print(f"Saved index file '{index_filepath_added}'") + faiss.write_index(index_added, index_filepath_added) + print(f"Saved index file '{index_filepath_added}'") except Exception as error: print(f"An error occurred extracting the index: {error}") diff --git a/rvc/train/process/extract_model.py b/rvc/train/process/extract_model.py index 84824a0261dde1245f4e041e17e2cebdadcf89b7..d4abde9fdba8fad1ab5f97410e96fbe993160777 100644 --- a/rvc/train/process/extract_model.py +++ b/rvc/train/process/extract_model.py @@ -1,8 +1,12 @@ -import os +import os, sys import torch import hashlib import datetime from collections import OrderedDict +import json + +now_dir = os.getcwd() +sys.path.append(now_dir) def replace_keys_in_dict(d, old_key_part, new_key_part): @@ -18,18 +22,45 @@ def replace_keys_in_dict(d, old_key_part, new_key_part): return updated_dict -def extract_model(ckpt, sr, pitch_guidance, name, model_dir, epoch, step, version, hps): +def extract_model( + ckpt, + sr, + pitch_guidance, + name, + model_dir, + epoch, + step, + version, + hps, + overtrain_info, +): try: print(f"Saved model '{model_dir}' (epoch {epoch} and step {step})") model_dir_path = os.path.dirname(model_dir) os.makedirs(model_dir_path, exist_ok=True) - pth_file = f"{name}_{epoch}e_{step}s.pth" + if "best_epoch" in model_dir: + pth_file = f"{name}_{epoch}e_{step}s_best_epoch.pth" + else: + pth_file = f"{name}_{epoch}e_{step}s.pth" + pth_file_old_version_path = os.path.join( model_dir_path, f"{pth_file}_old_version.pth" ) + model_dir_path = os.path.dirname(model_dir) + if os.path.exists(os.path.join(model_dir_path, "model_info.json")): + with open(os.path.join(model_dir_path, "model_info.json"), "r") as f: + data = json.load(f) + dataset_lenght = data.get("total_dataset_duration", None) + else: + dataset_lenght = None + + with open(os.path.join(now_dir, "assets", "config.json"), "r") as f: + data = json.load(f) + model_author = data.get("model_author", None) + opt = OrderedDict( weight={ key: value.half() for key, value in ckpt.items() if "enc_q" not in key @@ -66,6 +97,10 @@ def extract_model(ckpt, sr, pitch_guidance, name, model_dir, epoch, step, versio hash_input = f"{str(ckpt)} {epoch} {step} {datetime.datetime.now().isoformat()}" model_hash = hashlib.sha256(hash_input.encode()).hexdigest() opt["model_hash"] = model_hash + opt["overtrain_info"] = overtrain_info + opt["dataset_lenght"] = dataset_lenght + opt["model_name"] = name + opt["author"] = model_author torch.save(opt, os.path.join(model_dir_path, pth_file)) diff --git a/rvc/train/process/model_blender.py b/rvc/train/process/model_blender.py index 9d72f48516a5656a42e12c2c76f3eadebbcbb378..e08105d5f597269f8f1e44f4099d20c4d33bd1df 100644 --- a/rvc/train/process/model_blender.py +++ b/rvc/train/process/model_blender.py @@ -26,6 +26,7 @@ def model_blender(name, path1, path2, ratio): cfg = ckpt1["config"] cfg_f0 = ckpt1["f0"] cfg_version = ckpt1["version"] + cfg_sr = ckpt1["sr"] if "model" in ckpt1: ckpt1 = extract(ckpt1) @@ -54,14 +55,14 @@ def model_blender(name, path1, path2, ratio): ).half() opt["config"] = cfg - opt["sr"] = message + opt["sr"] = cfg_sr opt["f0"] = cfg_f0 opt["version"] = cfg_version opt["info"] = message - torch.save(opt, os.path.join("logs", "%s.pth" % name)) + torch.save(opt, os.path.join("logs", f"{name}.pth")) print(message) - return message, os.path.join("logs", "%s.pth" % name) + return message, os.path.join("logs", f"{name}.pth") except Exception as error: print(f"An error occurred blending the models: {error}") return error diff --git a/rvc/train/process/model_information.py b/rvc/train/process/model_information.py index be74269e4875693e27fca88b025158a9f9fcbe5a..d5e6051b6a3f4ac53644f16508d72eafedfad0f1 100644 --- a/rvc/train/process/model_information.py +++ b/rvc/train/process/model_information.py @@ -17,24 +17,32 @@ def model_information(path): print(f"Loaded model from {path}") + model_name = model_data.get("model_name", "None") epochs = model_data.get("epoch", "None") steps = model_data.get("step", "None") sr = model_data.get("sr", "None") f0 = model_data.get("f0", "None") + dataset_lenght = model_data.get("dataset_lenght", "None") version = model_data.get("version", "None") creation_date = model_data.get("creation_date", "None") model_hash = model_data.get("model_hash", None) + overtrain_info = model_data.get("overtrain_info", "None") + model_author = model_data.get("author", "None") pitch_guidance = "True" if f0 == 1 else "False" creation_date_str = prettify_date(creation_date) if creation_date else "None" return ( + f"Model Name: {model_name}\n" + f"Model Creator: {model_author}\n" f"Epochs: {epochs}\n" f"Steps: {steps}\n" - f"RVC Version: {version}\n" + f"Model Architecture: {version}\n" f"Sampling Rate: {sr}\n" f"Pitch Guidance: {pitch_guidance}\n" + f"Dataset Length: {dataset_lenght}\n" f"Creation Date: {creation_date_str}\n" - f"Hash (ID): {model_hash}" + f"Hash (ID): {model_hash}\n" + f"Overtrain Info: {overtrain_info}" ) diff --git a/rvc/train/train.py b/rvc/train/train.py index 9ba608a9d1d896350975fb087fc2efa6af5faad0..d6ec9434bc873ae948f9d67dad2cf5b55b486a54 100644 --- a/rvc/train/train.py +++ b/rvc/train/train.py @@ -76,6 +76,7 @@ sync_graph = strtobool(sys.argv[16]) current_dir = os.getcwd() experiment_dir = os.path.join(current_dir, "logs", model_name) config_save_path = os.path.join(experiment_dir, "config.json") +dataset_path = os.path.join(experiment_dir, "sliced_audios") with open(config_save_path, "r") as f: config = json.load(f) @@ -89,10 +90,16 @@ torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = False global_step = 0 -lowest_value = {"step": 0, "value": float("inf"), "epoch": 0} last_loss_gen_all = 0 +overtrain_save_epoch = 0 +loss_gen_history = [] +smoothed_loss_gen_history = [] +loss_disc_history = [] +smoothed_loss_disc_history = [] +lowest_value = {"step": 0, "value": float("inf"), "epoch": 0} +training_file_path = os.path.join(experiment_dir, "training_data.json") +overtrain_info = None -# Disable logging import logging logging.getLogger("torch").setLevel(logging.ERROR) @@ -123,6 +130,7 @@ def main(): """ Main function to start the training process. """ + global training_file_path, last_loss_gen_all, smoothed_loss_gen_history, loss_gen_history, loss_disc_history, smoothed_loss_disc_history, overtrain_save_epoch os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(randint(20000, 55555)) @@ -131,8 +139,14 @@ def main(): Starts the training process with multi-GPU support. """ children = [] - pid_file_path = os.path.join(experiment_dir, "train_pid.txt") - with open(pid_file_path, "w") as pid_file: + pid_data = {"process_pids": []} + with open(config_save_path, "r") as pid_file: + try: + existing_data = json.load(pid_file) + pid_data.update(existing_data) + except json.JSONDecodeError: + pass + with open(config_save_path, "w") as pid_file: for i in range(n_gpus): subproc = mp.Process( target=run, @@ -150,11 +164,46 @@ def main(): ) children.append(subproc) subproc.start() - pid_file.write(str(subproc.pid) + "\n") + pid_data["process_pids"].append(subproc.pid) + json.dump(pid_data, pid_file, indent=4) for i in range(n_gpus): children[i].join() + def load_from_json(file_path): + """ + Load data from a JSON file. + + Args: + file_path (str): The path to the JSON file. + """ + if os.path.exists(file_path): + with open(file_path, "r") as f: + data = json.load(f) + return ( + data.get("loss_disc_history", []), + data.get("smoothed_loss_disc_history", []), + data.get("loss_gen_history", []), + data.get("smoothed_loss_gen_history", []), + ) + return [], [], [], [] + + def continue_overtrain_detector(training_file_path): + """ + Continues the overtrain detector by loading the training history from a JSON file. + + Args: + training_file_path (str): The file path of the JSON file containing the training history. + """ + if overtraining_detector: + if os.path.exists(training_file_path): + ( + loss_disc_history, + smoothed_loss_disc_history, + loss_gen_history, + smoothed_loss_gen_history, + ) = load_from_json(training_file_path) + n_gpus = torch.cuda.device_count() if torch.cuda.is_available() == False and torch.backends.mps.is_available() == True: @@ -181,12 +230,12 @@ def main(): now_dir, "rvc", "configs", "v1", str(sample_rate) + ".json" ) - pattern = rf"{os.path.basename(model_name)}_1e_(\d+)s\.pth" + pattern = rf"{os.path.basename(model_name)}_(\d+)e_(\d+)s\.pth" for filename in os.listdir(experiment_dir): match = re.match(pattern, filename) if match: - steps = int(match.group(1)) + steps = int(match.group(2)) def edit_config(config_file): """ @@ -239,10 +288,12 @@ def main(): print("Successfully synchronized graphs!") custom_total_epoch = total_epoch custom_save_every_weights = save_every_weights + continue_overtrain_detector(training_file_path) start() else: custom_total_epoch = total_epoch custom_save_every_weights = save_every_weights + continue_overtrain_detector(training_file_path) start() @@ -265,6 +316,7 @@ def run( n_gpus (int): Total number of GPUs. """ global global_step + if rank == 0: writer = SummaryWriter(log_dir=experiment_dir) writer_eval = SummaryWriter(log_dir=os.path.join(experiment_dir, "eval")) @@ -276,6 +328,14 @@ def run( if torch.cuda.is_available(): torch.cuda.set_device(rank) + # Zluda + if torch.cuda.is_available() and torch.cuda.get_device_name().endswith("[ZLUDA]"): + print("Disabling CUDNN for traning with Zluda") + torch.backends.cudnn.enabled = False + torch.backends.cuda.enable_flash_sdp(False) + torch.backends.cuda.enable_math_sdp(True) + torch.backends.cuda.enable_mem_efficient_sdp(False) + # Create datasets and dataloaders if pitch_guidance == True: train_dataset = TextAudioLoaderMultiNSFsid(config.data) @@ -356,6 +416,7 @@ def run( _, _, _, epoch_str = load_checkpoint( latest_checkpoint_path(experiment_dir, "G_*.pth"), net_g, optim_g ) + epoch_str += 1 global_step = (epoch_str - 1) * len(train_loader) except: @@ -395,6 +456,9 @@ def run( optim_d, gamma=config.train.lr_decay, last_epoch=epoch_str - 2 ) + optim_d.step() + optim_g.step() + scaler = GradScaler(enabled=config.train.fp16_run) cache = [] @@ -427,7 +491,6 @@ def run( custom_save_every_weights, custom_total_epoch, ) - scheduler_g.step() scheduler_d.step() @@ -459,11 +522,13 @@ def train_and_evaluate( writers (list): List of TensorBoard writers [writer, writer_eval]. cache (list): List to cache data in GPU memory. """ - global global_step, last_loss_gen_all, lowest_value + global global_step, lowest_value, loss_disc, consecutive_increases_gen, consecutive_increases_disc if epoch == 1: lowest_value = {"step": 0, "value": float("inf"), "epoch": 0} last_loss_gen_all = 0.0 + consecutive_increases_gen = 0 + consecutive_increases_disc = 0 net_g, net_d = nets optim_g, optim_d = optims @@ -477,7 +542,7 @@ def train_and_evaluate( net_d.train() # Data caching - if cache_data_in_gpu == True: + if cache_data_in_gpu: data_iterator = cache if cache == []: for batch_idx, info in enumerate(train_loader): @@ -578,6 +643,7 @@ def train_and_evaluate( spec = spec.cuda(rank, non_blocking=True) spec_lengths = spec_lengths.cuda(rank, non_blocking=True) wave = wave.cuda(rank, non_blocking=True) + wave_lengths = wave_lengths.cuda(rank, non_blocking=True) # Forward pass with autocast(enabled=config.train.fp16_run): @@ -608,7 +674,10 @@ def train_and_evaluate( config.data.mel_fmax, ) y_mel = commons.slice_segments( - mel, ids_slice, config.train.segment_size // config.data.hop_length + mel, + ids_slice, + config.train.segment_size // config.data.hop_length, + dim=3, ) with autocast(enabled=False): y_hat_mel = mel_spectrogram_torch( @@ -624,7 +693,10 @@ def train_and_evaluate( if config.train.fp16_run == True: y_hat_mel = y_hat_mel.half() wave = commons.slice_segments( - wave, ids_slice * config.data.hop_length, config.train.segment_size + wave, + ids_slice * config.data.hop_length, + config.train.segment_size, + dim=3, ) y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach()) @@ -673,7 +745,6 @@ def train_and_evaluate( if rank == 0: if global_step % config.train.log_interval == 0: lr = optim_g.param_groups[0]["lr"] - # print("Epoch: {} [{:.0f}%]".format(epoch, 100.0 * batch_idx / len(train_loader))) if loss_mel > 75: loss_mel = 75 @@ -694,21 +765,14 @@ def train_and_evaluate( "loss/g/kl": loss_kl, } ) - scalar_dict.update( - {"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)} + {f"loss/g/{i}": v for i, v in enumerate(losses_gen)} ) scalar_dict.update( - { - "loss/d_r/{}".format(i): v - for i, v in enumerate(losses_disc_r) - } + {f"loss/d_r/{i}": v for i, v in enumerate(losses_disc_r)} ) scalar_dict.update( - { - "loss/d_g/{}".format(i): v - for i, v in enumerate(losses_disc_g) - } + {f"loss/d_g/{i}": v for i, v in enumerate(losses_disc_g)} ) image_dict = { "slice/mel_org": plot_spectrogram_to_numpy( @@ -748,12 +812,13 @@ def train_and_evaluate( epoch, os.path.join(experiment_dir, "D_" + checkpoint_suffix), ) - if rank == 0 and custom_save_every_weights == True: if hasattr(net_g, "module"): ckpt = net_g.module.state_dict() else: ckpt = net_g.state_dict() + if overtraining_detector != True: + overtrain_info = None extract_model( ckpt=ckpt, sr=sample_rate, @@ -767,26 +832,131 @@ def train_and_evaluate( step=global_step, version=version, hps=hps, + overtrain_info=overtrain_info, ) - # Overtraining detection and best model saving - if overtraining_detector == True: - if epoch >= (lowest_value["epoch"] + overtraining_threshold): - print( - "Stopping training due to possible overtraining. Lowest generator loss: {} at epoch {}, step {}".format( - lowest_value["value"], lowest_value["epoch"], lowest_value["step"] - ) + def check_overtraining(smoothed_loss_history, threshold, epsilon=0.004): + """ + Checks for overtraining based on the smoothed loss history. + + Args: + smoothed_loss_history (list): List of smoothed losses for each epoch. + threshold (int): Number of consecutive epochs with insignificant changes or increases to consider overtraining. + epsilon (float): The maximum change considered insignificant. + """ + if len(smoothed_loss_history) < threshold + 1: + return False + + for i in range(-threshold, -1): + if smoothed_loss_history[i + 1] > smoothed_loss_history[i]: + return True + if abs(smoothed_loss_history[i + 1] - smoothed_loss_history[i]) >= epsilon: + return False + + return True + + def update_exponential_moving_average( + smoothed_loss_history, new_value, smoothing=0.987 + ): + """ + Updates the exponential moving average with a new value. + + Args: + smoothed_loss_history (list): List of smoothed values. + new_value (float): New value to be added. + smoothing (float): Smoothing factor. + """ + if not smoothed_loss_history: + smoothed_value = new_value + else: + smoothed_value = ( + smoothing * smoothed_loss_history[-1] + (1 - smoothing) * new_value ) - os._exit(2333333) + smoothed_loss_history.append(smoothed_value) + return smoothed_value + + def save_to_json( + file_path, + loss_disc_history, + smoothed_loss_disc_history, + loss_gen_history, + smoothed_loss_gen_history, + ): + """ + Save the training history to a JSON file. + """ + data = { + "loss_disc_history": loss_disc_history, + "smoothed_loss_disc_history": smoothed_loss_disc_history, + "loss_gen_history": loss_gen_history, + "smoothed_loss_gen_history": smoothed_loss_gen_history, + } + with open(file_path, "w") as f: + json.dump(data, f) + + if overtraining_detector and rank == 0 and epoch > 1: + # Add the current loss to the history + current_loss_disc = float(loss_disc) + loss_disc_history.append(current_loss_disc) + + # Update smoothed loss history with loss_disc + smoothed_value_disc = update_exponential_moving_average( + smoothed_loss_disc_history, current_loss_disc + ) - best_epoch = lowest_value["epoch"] + overtraining_threshold - epoch + # Check overtraining with smoothed loss_disc + is_overtraining_disc = check_overtraining( + smoothed_loss_disc_history, overtraining_threshold * 2 + ) + if is_overtraining_disc: + consecutive_increases_disc += 1 + else: + consecutive_increases_disc = 0 + # Add the current loss_gen to the history + current_loss_gen = float(lowest_value["value"]) + loss_gen_history.append(current_loss_gen) + + # Update the smoothed loss_gen history + smoothed_value_gen = update_exponential_moving_average( + smoothed_loss_gen_history, current_loss_gen + ) + + # Check for overtraining with the smoothed loss_gen + is_overtraining_gen = check_overtraining( + smoothed_loss_gen_history, overtraining_threshold, 0.01 + ) + if is_overtraining_gen: + consecutive_increases_gen += 1 + else: + consecutive_increases_gen = 0 + + overtrain_info = f"Smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}" + # Save the data in the JSON file if the epoch is divisible by save_every_epoch + if epoch % save_every_epoch == 0: + save_to_json( + training_file_path, + loss_disc_history, + smoothed_loss_disc_history, + loss_gen_history, + smoothed_loss_gen_history, + ) - if best_epoch == overtraining_threshold: + if ( + is_overtraining_gen + and consecutive_increases_gen == overtraining_threshold + or is_overtraining_disc + and consecutive_increases_disc == (overtraining_threshold * 2) + ): + print( + f"Overtraining detected at epoch {epoch} with smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}" + ) + os._exit(2333333) + else: + print( + f"New best epoch {epoch} with smoothed loss_g {smoothed_value_gen:.3f} and loss_d {smoothed_value_disc:.3f}" + ) old_model_files = glob.glob( - os.path.join( - experiment_dir, - "{}_{}e_{}s_best_epoch.pth".format(model_name, "*", "*"), - ) + os.path.join(experiment_dir, f"{model_name}_*e_*s_best_epoch.pth") ) for file in old_model_files: os.remove(file) @@ -795,7 +965,8 @@ def train_and_evaluate( ckpt = net_g.module.state_dict() else: ckpt = net_g.state_dict() - + if overtraining_detector != True: + overtrain_info = None extract_model( ckpt=ckpt, sr=sample_rate, @@ -809,18 +980,21 @@ def train_and_evaluate( step=global_step, version=version, hps=hps, + overtrain_info=overtrain_info, ) # Print training progress if rank == 0: - lowest_value_rounded = float(lowest_value["value"]) # Convert to float - lowest_value_rounded = round( - lowest_value_rounded, 3 - ) # Round to 3 decimal place + lowest_value_rounded = float(lowest_value["value"]) + lowest_value_rounded = round(lowest_value_rounded, 3) if epoch > 1 and overtraining_detector == True: + remaining_epochs_gen = overtraining_threshold - consecutive_increases_gen + remaining_epochs_disc = ( + overtraining_threshold * 2 + ) - consecutive_increases_disc print( - f"{model_name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()} | lowest_value={lowest_value_rounded} (epoch {lowest_value['epoch']} and step {lowest_value['step']}) | Number of epochs remaining for overtraining: {lowest_value['epoch'] + overtraining_threshold - epoch}" + f"{model_name} | epoch={epoch} | step={global_step} | {epoch_recorder.record()} | lowest_value={lowest_value_rounded} (epoch {lowest_value['epoch']} and step {lowest_value['step']}) | Number of epochs remaining for overtraining: g/total: {remaining_epochs_gen} d/total: {remaining_epochs_disc} | smoothed_loss_gen={smoothed_value_gen:.3f} | smoothed_loss_disc={smoothed_value_disc:.3f}" ) elif epoch > 1 and overtraining_detector == False: print( @@ -834,10 +1008,8 @@ def train_and_evaluate( # Save the final model if epoch >= custom_total_epoch and rank == 0: - lowest_value_rounded = float(lowest_value["value"]) # Convert to float - lowest_value_rounded = round( - lowest_value_rounded, 3 - ) # Round to 3 decimal place + lowest_value_rounded = float(lowest_value["value"]) + lowest_value_rounded = round(lowest_value_rounded, 3) print( f"Training has been successfully completed with {epoch} epoch, {global_step} steps and {round(loss_gen_all.item(), 3)} loss gen." ) @@ -845,32 +1017,41 @@ def train_and_evaluate( f"Lowest generator loss: {lowest_value_rounded} at epoch {lowest_value['epoch']}, step {lowest_value['step']}" ) - pid_file_path = os.path.join(experiment_dir, "train_pid.txt") - os.remove(pid_file_path) + pid_file_path = os.path.join(experiment_dir, "config.json") + with open(pid_file_path, "r") as pid_file: + pid_data = json.load(pid_file) + with open(pid_file_path, "w") as pid_file: + pid_data.pop("process_pids", None) + json.dump(pid_data, pid_file, indent=4) - if hasattr(net_g, "module"): - ckpt = net_g.module.state_dict() - else: - ckpt = net_g.state_dict() - - extract_model( - ckpt=ckpt, - sr=sample_rate, - pitch_guidance=pitch_guidance == True, - name=model_name, - model_dir=os.path.join( - experiment_dir, - f"{model_name}_{epoch}e_{global_step}s.pth", - ), - epoch=epoch, - step=global_step, - version=version, - hps=hps, - ) + if not os.path.exists( + os.path.join(experiment_dir, f"{model_name}_{epoch}e_{global_step}s.pth") + ): + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + if overtraining_detector != True: + overtrain_info = None + extract_model( + ckpt=ckpt, + sr=sample_rate, + pitch_guidance=pitch_guidance == True, + name=model_name, + model_dir=os.path.join( + experiment_dir, + f"{model_name}_{epoch}e_{global_step}s.pth", + ), + epoch=epoch, + step=global_step, + version=version, + hps=hps, + overtrain_info=overtrain_info, + ) sleep(1) os._exit(2333333) if __name__ == "__main__": torch.multiprocessing.set_start_method("spawn") - main() \ No newline at end of file + main() diff --git a/rvc/train/utils.py b/rvc/train/utils.py index 1c3e2a5f96368db0a77e470978e4469b12b3c052..cd4051630a5e8597156a67a42ff6ab9576504bdc 100644 --- a/rvc/train/utils.py +++ b/rvc/train/utils.py @@ -4,38 +4,36 @@ import torch import numpy as np from scipy.io.wavfile import read from collections import OrderedDict -import matplotlib.pylab as plt +import matplotlib.pyplot as plt MATPLOTLIB_FLAG = False def replace_keys_in_dict(d, old_key_part, new_key_part): """ - Replaces keys in a dictionary recursively. + Recursively replace parts of the keys in a dictionary. Args: d (dict or OrderedDict): The dictionary to update. old_key_part (str): The part of the key to replace. new_key_part (str): The new part of the key. """ - if isinstance(d, OrderedDict): - updated_dict = OrderedDict() - else: - updated_dict = {} + updated_dict = OrderedDict() if isinstance(d, OrderedDict) else {} for key, value in d.items(): - if isinstance(key, str): - new_key = key.replace(old_key_part, new_key_part) - else: - new_key = key - if isinstance(value, dict): - value = replace_keys_in_dict(value, old_key_part, new_key_part) - updated_dict[new_key] = value + new_key = ( + key.replace(old_key_part, new_key_part) if isinstance(key, str) else key + ) + updated_dict[new_key] = ( + replace_keys_in_dict(value, old_key_part, new_key_part) + if isinstance(value, dict) + else value + ) return updated_dict def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1): """ - Loads a checkpoint from a file. + Load a checkpoint into a model and optionally the optimizer. Args: checkpoint_path (str): Path to the checkpoint file. @@ -43,64 +41,50 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1): optimizer (torch.optim.Optimizer, optional): The optimizer to load the state from. Defaults to None. load_opt (int, optional): Whether to load the optimizer state. Defaults to 1. """ - assert os.path.isfile(checkpoint_path) - checkpoint_old_dict = torch.load(checkpoint_path, map_location="cpu") - checkpoint_new_version_path = os.path.join( - os.path.dirname(checkpoint_path), - f"{os.path.splitext(os.path.basename(checkpoint_path))[0]}_new_version.pth", - ) + assert os.path.isfile( + checkpoint_path + ), f"Checkpoint file not found: {checkpoint_path}" - torch.save( + checkpoint_dict = torch.load(checkpoint_path, map_location="cpu") + checkpoint_dict = replace_keys_in_dict( replace_keys_in_dict( - replace_keys_in_dict( - checkpoint_old_dict, ".weight_v", ".parametrizations.weight.original1" - ), - ".weight_g", - ".parametrizations.weight.original0", + checkpoint_dict, ".weight_v", ".parametrizations.weight.original1" ), - checkpoint_new_version_path, + ".weight_g", + ".parametrizations.weight.original0", ) - os.remove(checkpoint_path) - os.rename(checkpoint_new_version_path, checkpoint_path) + # Update model state_dict + model_state_dict = ( + model.module.state_dict() if hasattr(model, "module") else model.state_dict() + ) + new_state_dict = { + k: checkpoint_dict["model"].get(k, v) for k, v in model_state_dict.items() + } - checkpoint_dict = torch.load(checkpoint_path, map_location="cpu") - saved_state_dict = checkpoint_dict["model"] - if hasattr(model, "module"): - state_dict = model.module.state_dict() - else: - state_dict = model.state_dict() - new_state_dict = {} - for k, v in state_dict.items(): - try: - new_state_dict[k] = saved_state_dict[k] - if saved_state_dict[k].shape != state_dict[k].shape: - print( - "shape-%s-mismatch|need-%s|get-%s", - k, - state_dict[k].shape, - saved_state_dict[k].shape, - ) - raise KeyError - except: - print("%s is not in the checkpoint", k) - new_state_dict[k] = v + # Load state_dict into model if hasattr(model, "module"): model.module.load_state_dict(new_state_dict, strict=False) else: model.load_state_dict(new_state_dict, strict=False) - iteration = checkpoint_dict["iteration"] - learning_rate = checkpoint_dict["learning_rate"] - if optimizer is not None and load_opt == 1: - optimizer.load_state_dict(checkpoint_dict["optimizer"]) - print(f"Loaded checkpoint '{checkpoint_path}' (epoch {iteration})") - return model, optimizer, learning_rate, iteration + if optimizer and load_opt == 1: + optimizer.load_state_dict(checkpoint_dict.get("optimizer", {})) + + print( + f"Loaded checkpoint '{checkpoint_path}' (epoch {checkpoint_dict['iteration']})" + ) + return ( + model, + optimizer, + checkpoint_dict.get("learning_rate", 0), + checkpoint_dict["iteration"], + ) def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path): """ - Saves a checkpoint to a file. + Save the model and optimizer state to a checkpoint file. Args: model (torch.nn.Module): The model to save. @@ -109,37 +93,30 @@ def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path) iteration (int): The current iteration. checkpoint_path (str): The path to save the checkpoint to. """ - print(f"Saved model '{checkpoint_path}' (epoch {iteration})") - checkpoint_old_version_path = os.path.join( - os.path.dirname(checkpoint_path), - f"{os.path.splitext(os.path.basename(checkpoint_path))[0]}_old_version.pth", + state_dict = ( + model.module.state_dict() if hasattr(model, "module") else model.state_dict() ) - if hasattr(model, "module"): - state_dict = model.module.state_dict() - else: - state_dict = model.state_dict() - torch.save( - { - "model": state_dict, - "iteration": iteration, - "optimizer": optimizer.state_dict(), - "learning_rate": learning_rate, - }, - checkpoint_path, - ) - checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu")) - torch.save( + checkpoint_data = { + "model": state_dict, + "iteration": iteration, + "optimizer": optimizer.state_dict(), + "learning_rate": learning_rate, + } + torch.save(checkpoint_data, checkpoint_path) + + # Create a backwards-compatible checkpoint + old_version_path = checkpoint_path.replace(".pth", "_old_version.pth") + checkpoint_data = replace_keys_in_dict( replace_keys_in_dict( - replace_keys_in_dict( - checkpoint, ".parametrizations.weight.original1", ".weight_v" - ), - ".parametrizations.weight.original0", - ".weight_g", + checkpoint_data, ".parametrizations.weight.original1", ".weight_v" ), - checkpoint_old_version_path, + ".parametrizations.weight.original0", + ".weight_g", ) - os.remove(checkpoint_path) - os.rename(checkpoint_old_version_path, checkpoint_path) + torch.save(checkpoint_data, old_version_path) + + os.replace(old_version_path, checkpoint_path) + print(f"Saved model '{checkpoint_path}' (epoch {iteration})") def summarize( @@ -152,16 +129,16 @@ def summarize( audio_sample_rate=22050, ): """ - Summarizes training statistics and logs them to a TensorBoard writer. + Log various summaries to a TensorBoard writer. Args: writer (SummaryWriter): The TensorBoard writer. global_step (int): The current global step. - scalars (dict, optional): Dictionary of scalar values to log. Defaults to {}. - histograms (dict, optional): Dictionary of histogram values to log. Defaults to {}. - images (dict, optional): Dictionary of image values to log. Defaults to {}. - audios (dict, optional): Dictionary of audio values to log. Defaults to {}. - audio_sample_rate (int, optional): Sampling rate of the audio data. Defaults to 22050. + scalars (dict, optional): Dictionary of scalar values to log. + histograms (dict, optional): Dictionary of histogram values to log. + images (dict, optional): Dictionary of image values to log. + audios (dict, optional): Dictionary of audio values to log. + audio_sample_rate (int, optional): Sampling rate of the audio data. """ for k, v in scalars.items(): writer.add_scalar(k, v, global_step) @@ -175,30 +152,29 @@ def summarize( def latest_checkpoint_path(dir_path, regex="G_*.pth"): """ - Returns the path to the latest checkpoint file in a directory. + Get the latest checkpoint file in a directory. Args: dir_path (str): The directory to search for checkpoints. - regex (str, optional): The regular expression to match checkpoint files. Defaults to "G_*.pth". + regex (str, optional): The regular expression to match checkpoint files. """ - f_list = glob.glob(os.path.join(dir_path, regex)) - f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f)))) - x = f_list[-1] - return x + checkpoints = sorted( + glob.glob(os.path.join(dir_path, regex)), + key=lambda f: int("".join(filter(str.isdigit, f))), + ) + return checkpoints[-1] if checkpoints else None def plot_spectrogram_to_numpy(spectrogram): """ - Plots a spectrogram to a NumPy array. + Convert a spectrogram to a NumPy array for visualization. Args: spectrogram (numpy.ndarray): The spectrogram to plot. """ global MATPLOTLIB_FLAG if not MATPLOTLIB_FLAG: - import matplotlib - - matplotlib.use("Agg") + plt.switch_backend("Agg") MATPLOTLIB_FLAG = True fig, ax = plt.subplots(figsize=(10, 2)) @@ -209,15 +185,15 @@ def plot_spectrogram_to_numpy(spectrogram): plt.tight_layout() fig.canvas.draw() - data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") + data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) - plt.close() + plt.close(fig) return data def load_wav_to_torch(full_path): """ - Loads a WAV file into a PyTorch tensor. + Load a WAV file into a PyTorch tensor. Args: full_path (str): The path to the WAV file. @@ -228,81 +204,45 @@ def load_wav_to_torch(full_path): def load_filepaths_and_text(filename, split="|"): """ - Loads filepaths and text from a file. + Load filepaths and associated text from a file. Args: filename (str): The path to the file. - split (str, optional): The delimiter used to split the lines. Defaults to "|". + split (str, optional): The delimiter used to split the lines. """ with open(filename, encoding="utf-8") as f: - filepaths_and_text = [line.strip().split(split) for line in f] - return filepaths_and_text + return [line.strip().split(split) for line in f] class HParams: """ A class for storing and accessing hyperparameters. - - Attributes: - **kwargs: Keyword arguments representing hyperparameters and their values. """ def __init__(self, **kwargs): - """ - Initializes an HParams object. - - Args: - **kwargs: Keyword arguments representing hyperparameters and their values. - """ for k, v in kwargs.items(): - if type(v) == dict: - v = HParams(**v) - self[k] = v + self[k] = HParams(**v) if isinstance(v, dict) else v def keys(self): - """ - Returns a list of hyperparameter keys. - """ return self.__dict__.keys() def items(self): - """ - Returns a list of (key, value) pairs for each hyperparameter. - """ return self.__dict__.items() def values(self): - """ - Returns a list of hyperparameter values. - """ return self.__dict__.values() def __len__(self): - """ - Returns the number of hyperparameters. - """ return len(self.__dict__) def __getitem__(self, key): - """ - Gets the value of a hyperparameter. - """ - return getattr(self, key) + return self.__dict__[key] def __setitem__(self, key, value): - """ - Sets the value of a hyperparameter. - """ - return setattr(self, key, value) + self.__dict__[key] = value def __contains__(self, key): - """ - Checks if a hyperparameter key exists. - """ return key in self.__dict__ def __repr__(self): - """ - Returns a string representation of the HParams object. - """ - return self.__dict__.__repr__() + return repr(self.__dict__) diff --git a/tabs/download/download.py b/tabs/download/download.py index bc89b647caa76d1c20c86031822e96095bb31468..50f1255d7f8a9f2efd4bf2bfe399f938c31ab3ee 100644 --- a/tabs/download/download.py +++ b/tabs/download/download.py @@ -4,6 +4,7 @@ import gradio as gr import pandas as pd import requests import wget +import json from core import run_download_script from assets.i18n.i18n import I18nAuto @@ -72,9 +73,39 @@ json_url = "https://huggingface.co/IAHispano/Applio/raw/main/pretrains.json" def fetch_pretrained_data(): - response = requests.get(json_url) - response.raise_for_status() - return response.json() + pretraineds_custom_path = os.path.join( + "rvc", "models", "pretraineds", "pretraineds_custom" + ) + os.makedirs(pretraineds_custom_path, exist_ok=True) + try: + with open( + os.path.join(pretraineds_custom_path, json_url.split("/")[-1]), "r" + ) as f: + data = json.load(f) + except: + try: + response = requests.get(json_url) + response.raise_for_status() + data = response.json() + with open( + os.path.join(pretraineds_custom_path, json_url.split("/")[-1]), + "w", + encoding="utf-8", + ) as f: + json.dump( + data, + f, + indent=2, + separators=(",", ": "), + ensure_ascii=False, + ) + except: + data = { + "Titan": { + "32k": {"D": "null", "G": "null"}, + }, + } + return data def get_pretrained_list(): @@ -90,7 +121,9 @@ def get_pretrained_sample_rates(model): def download_pretrained_model(model, sample_rate): data = fetch_pretrained_data() paths = data[model][sample_rate] - pretraineds_custom_path = os.path.join("rvc", "pretraineds", "pretraineds_custom") + pretraineds_custom_path = os.path.join( + "rvc", "models", "pretraineds", "pretraineds_custom" + ) os.makedirs(pretraineds_custom_path, exist_ok=True) d_url = f"https://huggingface.co/{paths['D']}" diff --git a/tabs/extra/extra.py b/tabs/extra/extra.py index 40bf778a55eabd409d16724b3ec952a75b96117c..49d063d4a1af9f971ab4c8fb5f6e3abfd194dff5 100644 --- a/tabs/extra/extra.py +++ b/tabs/extra/extra.py @@ -9,6 +9,7 @@ sys.path.append(now_dir) from tabs.extra.processing.processing import processing_tab from tabs.extra.analyzer.analyzer import analyzer_tab from tabs.extra.f0_extractor.f0_extractor import f0_extractor_tab +from tabs.extra.model_author.model_author import model_author_tab from assets.i18n.i18n import I18nAuto @@ -21,12 +22,14 @@ def extra_tab(): "This section contains some extra utilities that often may be in experimental phases." ) ) + with gr.TabItem(i18n("Model information")): + processing_tab() with gr.TabItem(i18n("F0 Curve")): f0_extractor_tab() - with gr.TabItem(i18n("Processing")): - processing_tab() - with gr.TabItem(i18n("Audio Analyzer")): analyzer_tab() + + with gr.TabItem(i18n("Model Author")): + model_author_tab() diff --git a/tabs/extra/model_author/model_author.py b/tabs/extra/model_author/model_author.py new file mode 100644 index 0000000000000000000000000000000000000000..9ee8e11bd845933bd4535dc4eafd7815673a2b0e --- /dev/null +++ b/tabs/extra/model_author/model_author.py @@ -0,0 +1,34 @@ +import os, sys + +now_dir = os.getcwd() +sys.path.append(now_dir) + +import gradio as gr +from assets.i18n.i18n import I18nAuto + +from core import run_model_author_script + +i18n = I18nAuto() + + +def model_author_tab(): + model_author_name = gr.Textbox( + label=i18n("Model Author Name"), + info=i18n("The name that will appear in the model information."), + placeholder=i18n("Enter your nickname"), + interactive=True, + ) + model_author_output_info = gr.Textbox( + label=i18n("Output Information"), + info=i18n("The output information will be displayed here."), + value="", + max_lines=1, + ) + button = gr.Button(i18n("Set name"), variant="primary") + + button.click( + fn=run_model_author_script, + inputs=[model_author_name], + outputs=[model_author_output_info], + api_name="model_author", + ) diff --git a/tabs/extra/model_information.py b/tabs/extra/model_information.py index 27e8f77e1ab940d97962b7011a56202dbf44f284..de0fc73d22be343e12f4fef994300bca89f3daa0 100644 --- a/tabs/extra/model_information.py +++ b/tabs/extra/model_information.py @@ -18,7 +18,7 @@ def model_information_tab(): label=i18n("Output Information"), info=i18n("The output information will be displayed here."), value="", - max_lines=8, + max_lines=11, interactive=False, ) model_information_button = gr.Button(i18n("See Model Information")) diff --git a/tabs/extra/processing/processing.py b/tabs/extra/processing/processing.py index dbc9e7cbd4f2073adfcf73b5491ae0ef60315cba..1eb037ac14e92cf25fec0e605fcd8784f7d80176 100644 --- a/tabs/extra/processing/processing.py +++ b/tabs/extra/processing/processing.py @@ -27,7 +27,7 @@ def processing_tab(): label=i18n("Output Information"), info=i18n("The output information will be displayed here."), value="", - max_lines=8, + max_lines=11, ) model_view_button = gr.Button(i18n("View"), variant="primary") model_view_button.click( diff --git a/tabs/inference/inference.py b/tabs/inference/inference.py index e221fe3601e2d369caf7f7f23cd4d4f6d0e36e1c..7544c6be993691e08c61dc7dac0cbbc1ffde8ddf 100644 --- a/tabs/inference/inference.py +++ b/tabs/inference/inference.py @@ -3,6 +3,7 @@ import gradio as gr import regex as re import shutil import datetime +import json from core import ( run_infer_script, @@ -12,6 +13,7 @@ from core import ( from assets.i18n.i18n import I18nAuto from rvc.lib.utils import format_title +from tabs.settings.restart import stop_infer i18n = I18nAuto() @@ -24,6 +26,9 @@ custom_embedder_root = os.path.join( now_dir, "rvc", "models", "embedders", "embedders_custom" ) +PRESETS_DIR = os.path.join(now_dir, "assets", "presets") +FORMANTSHIFT_DIR = os.path.join(now_dir, "assets", "formant_shift") + os.makedirs(custom_embedder_root, exist_ok=True) custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir) @@ -73,13 +78,92 @@ audio_paths = [ ] custom_embedders = [ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(custom_embedder_root_relative) - for filename in filenames - if filename.endswith(".pt") + os.path.join(dirpath, dirname) + for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative) + for dirname in dirnames ] +def update_sliders(preset): + with open( + os.path.join(PRESETS_DIR, f"{preset}.json"), "r", encoding="utf-8" + ) as json_file: + values = json.load(json_file) + return ( + values["pitch"], + values["filter_radius"], + values["index_rate"], + values["rms_mix_rate"], + values["protect"], + ) + + +def update_sliders_formant(preset): + with open( + os.path.join(FORMANTSHIFT_DIR, f"{preset}.json"), "r", encoding="utf-8" + ) as json_file: + values = json.load(json_file) + return ( + values["formant_qfrency"], + values["formant_timbre"], + ) + + +def export_presets(presets, file_path): + with open(file_path, "w", encoding="utf-8") as json_file: + json.dump(presets, json_file, ensure_ascii=False, indent=4) + + +def import_presets(file_path): + with open(file_path, "r", encoding="utf-8") as json_file: + presets = json.load(json_file) + return presets + + +def get_presets_data(pitch, filter_radius, index_rate, rms_mix_rate, protect): + return { + "pitch": pitch, + "filter_radius": filter_radius, + "index_rate": index_rate, + "rms_mix_rate": rms_mix_rate, + "protect": protect, + } + + +def export_presets_button( + preset_name, pitch, filter_radius, index_rate, rms_mix_rate, protect +): + if preset_name: + file_path = os.path.join(PRESETS_DIR, f"{preset_name}.json") + presets_data = get_presets_data( + pitch, filter_radius, index_rate, rms_mix_rate, protect + ) + with open(file_path, "w", encoding="utf-8") as json_file: + json.dump(presets_data, json_file, ensure_ascii=False, indent=4) + return "Export successful" + return "Export cancelled" + + +def import_presets_button(file_path): + if file_path: + imported_presets = import_presets(file_path.name) + return ( + list(imported_presets.keys()), + imported_presets, + "Presets imported successfully!", + ) + return [], {}, "No file selected for import." + + +def list_json_files(directory): + return [f.rsplit(".", 1)[0] for f in os.listdir(directory) if f.endswith(".json")] + + +def refresh_presets(): + json_files = list_json_files(PRESETS_DIR) + return gr.update(choices=json_files) + + def output_path_fn(input_audio_path): original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[ 0 @@ -116,19 +200,10 @@ def change_choices(): and "_output" not in name ] - custom_embedder = [ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(custom_embedder_root_relative) - for filename in filenames - if filename.endswith(".pt") - ] - return ( {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(indexes_list), "__type__": "update"}, {"choices": sorted(audio_paths), "__type__": "update"}, - {"choices": sorted(custom_embedder), "__type__": "update"}, - {"choices": sorted(custom_embedder), "__type__": "update"}, ) @@ -192,23 +267,36 @@ def match_index(model_file_value): return "" -def save_drop_custom_embedder(dropbox): - if ".pt" not in dropbox: - gr.Info( - i18n("The file you dropped is not a valid embedder file. Please try again.") - ) - else: - file_name = os.path.basename(dropbox) - custom_embedder_path = os.path.join(custom_embedder_root, file_name) - if os.path.exists(custom_embedder_path): - os.remove(custom_embedder_path) - os.rename(dropbox, custom_embedder_path) - gr.Info( - i18n( - "Click the refresh button to see the embedder file in the dropdown menu." - ) - ) - return None +def create_folder_and_move_files(folder_name, bin_file, config_file): + if not folder_name: + return "Folder name must not be empty." + + folder_name = os.path.join(custom_embedder_root, folder_name) + os.makedirs(folder_name, exist_ok=True) + + if bin_file: + bin_file_path = os.path.join(folder_name, os.path.basename(bin_file)) + shutil.copy(bin_file, bin_file_path) + + if config_file: + config_file_path = os.path.join(folder_name, os.path.basename(config_file)) + shutil.copy(config_file, config_file_path) + + return f"Files moved to folder {folder_name}" + + +def refresh_formant(): + json_files = list_json_files(FORMANTSHIFT_DIR) + return gr.update(choices=json_files) + + +def refresh_embedders_folders(): + custom_embedders = [ + os.path.join(dirpath, dirname) + for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative) + for dirname in dirnames + ] + return custom_embedders # Inference tab @@ -340,6 +428,393 @@ def inference_tab(): value=False, interactive=True, ) + formant_shifting = gr.Checkbox( + label=i18n("Formant Shifting"), + info=i18n( + "Enable formant shifting. Used for male to female and vice-versa convertions." + ), + value=False, + visible=True, + interactive=True, + ) + with gr.Row(): + formant_preset = gr.Dropdown( + label=i18n("Browse presets for formanting"), + info=i18n( + "Presets are located in /assets/formant_shift folder" + ), + choices=list_json_files(FORMANTSHIFT_DIR), + visible=False, + interactive=True, + ) + formant_refresh_button = gr.Button( + value="Refresh", + visible=False, + ) + formant_qfrency = gr.Slider( + value=1.0, + info=i18n("Default value is 1.0"), + label=i18n("Quefrency for formant shifting"), + minimum=0.0, + maximum=16.0, + step=0.1, + visible=False, + interactive=True, + ) + formant_timbre = gr.Slider( + value=1.0, + info=i18n("Default value is 1.0"), + label=i18n("Timbre for formant shifting"), + minimum=0.0, + maximum=16.0, + step=0.1, + visible=False, + interactive=True, + ) + post_process = gr.Checkbox( + label=i18n("Post-Process"), + info=i18n("Post-process the audio to apply effects to the output."), + value=False, + interactive=True, + ) + reverb = gr.Checkbox( + label=i18n("Reverb"), + info=i18n("Apply reverb to the audio."), + value=False, + interactive=True, + visible=False, + ) + reverb_room_size = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Room Size"), + info=i18n("Set the room size of the reverb."), + value=0.5, + interactive=True, + visible=False, + ) + + reverb_damping = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Damping"), + info=i18n("Set the damping of the reverb."), + value=0.5, + interactive=True, + visible=False, + ) + + reverb_wet_gain = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Wet Gain"), + info=i18n("Set the wet gain of the reverb."), + value=0.33, + interactive=True, + visible=False, + ) + + reverb_dry_gain = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Dry Gain"), + info=i18n("Set the dry gain of the reverb."), + value=0.4, + interactive=True, + visible=False, + ) + + reverb_width = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Width"), + info=i18n("Set the width of the reverb."), + value=1.0, + interactive=True, + visible=False, + ) + + reverb_freeze_mode = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Freeze Mode"), + info=i18n("Set the freeze mode of the reverb."), + value=0.0, + interactive=True, + visible=False, + ) + pitch_shift = gr.Checkbox( + label=i18n("Pitch Shift"), + info=i18n("Apply pitch shift to the audio."), + value=False, + interactive=True, + visible=False, + ) + pitch_shift_semitones = gr.Slider( + minimum=-12, + maximum=12, + label=i18n("Pitch Shift Semitones"), + info=i18n("Set the pitch shift semitones."), + value=0, + interactive=True, + visible=False, + ) + limiter = gr.Checkbox( + label=i18n("Limiter"), + info=i18n("Apply limiter to the audio."), + value=False, + interactive=True, + visible=False, + ) + limiter_threshold = gr.Slider( + minimum=-60, + maximum=0, + label=i18n("Limiter Threshold dB"), + info=i18n("Set the limiter threshold dB."), + value=-6, + interactive=True, + visible=False, + ) + + limiter_release_time = gr.Slider( + minimum=0.01, + maximum=1, + label=i18n("Limiter Release Time"), + info=i18n("Set the limiter release time."), + value=0.05, + interactive=True, + visible=False, + ) + gain = gr.Checkbox( + label=i18n("Gain"), + info=i18n("Apply gain to the audio."), + value=False, + interactive=True, + visible=False, + ) + gain_db = gr.Slider( + minimum=-60, + maximum=60, + label=i18n("Gain dB"), + info=i18n("Set the gain dB."), + value=0, + interactive=True, + visible=False, + ) + distortion = gr.Checkbox( + label=i18n("Distortion"), + info=i18n("Apply distortion to the audio."), + value=False, + interactive=True, + visible=False, + ) + distortion_gain = gr.Slider( + minimum=-60, + maximum=60, + label=i18n("Distortion Gain"), + info=i18n("Set the distortion gain."), + value=25, + interactive=True, + visible=False, + ) + chorus = gr.Checkbox( + label=i18n("chorus"), + info=i18n("Apply chorus to the audio."), + value=False, + interactive=True, + visible=False, + ) + chorus_rate = gr.Slider( + minimum=0, + maximum=100, + label=i18n("Chorus Rate Hz"), + info=i18n("Set the chorus rate Hz."), + value=1.0, + interactive=True, + visible=False, + ) + + chorus_depth = gr.Slider( + minimum=0, + maximum=1, + label=i18n("chorus Depth"), + info=i18n("Set the chorus depth."), + value=0.25, + interactive=True, + visible=False, + ) + + chorus_center_delay = gr.Slider( + minimum=7, + maximum=8, + label=i18n("chorus Center Delay ms"), + info=i18n("Set the chorus center delay ms."), + value=7, + interactive=True, + visible=False, + ) + + chorus_feedback = gr.Slider( + minimum=0, + maximum=1, + label=i18n("chorus Feedback"), + info=i18n("Set the chorus feedback."), + value=0.0, + interactive=True, + visible=False, + ) + + chorus_mix = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Chorus Mix"), + info=i18n("Set the chorus mix."), + value=0.5, + interactive=True, + visible=False, + ) + bitcrush = gr.Checkbox( + label=i18n("Bitcrush"), + info=i18n("Apply bitcrush to the audio."), + value=False, + interactive=True, + visible=False, + ) + bitcrush_bit_depth = gr.Slider( + minimum=1, + maximum=32, + label=i18n("Bitcrush Bit Depth"), + info=i18n("Set the bitcrush bit depth."), + value=8, + interactive=True, + visible=False, + ) + clipping = gr.Checkbox( + label=i18n("Clipping"), + info=i18n("Apply clipping to the audio."), + value=False, + interactive=True, + visible=False, + ) + clipping_threshold = gr.Slider( + minimum=-60, + maximum=0, + label=i18n("Clipping Threshold"), + info=i18n("Set the clipping threshold."), + value=-6, + interactive=True, + visible=False, + ) + compressor = gr.Checkbox( + label=i18n("Compressor"), + info=i18n("Apply compressor to the audio."), + value=False, + interactive=True, + visible=False, + ) + compressor_threshold = gr.Slider( + minimum=-60, + maximum=0, + label=i18n("Compressor Threshold dB"), + info=i18n("Set the compressor threshold dB."), + value=0, + interactive=True, + visible=False, + ) + + compressor_ratio = gr.Slider( + minimum=1, + maximum=20, + label=i18n("Compressor Ratio"), + info=i18n("Set the compressor ratio."), + value=1, + interactive=True, + visible=False, + ) + + compressor_attack = gr.Slider( + minimum=0.0, + maximum=100, + label=i18n("Compressor Attack ms"), + info=i18n("Set the compressor attack ms."), + value=1.0, + interactive=True, + visible=False, + ) + + compressor_release = gr.Slider( + minimum=0.01, + maximum=100, + label=i18n("Compressor Release ms"), + info=i18n("Set the compressor release ms."), + value=100, + interactive=True, + visible=False, + ) + delay = gr.Checkbox( + label=i18n("Delay"), + info=i18n("Apply delay to the audio."), + value=False, + interactive=True, + visible=False, + ) + delay_seconds = gr.Slider( + minimum=0.0, + maximum=5.0, + label=i18n("Delay Seconds"), + info=i18n("Set the delay seconds."), + value=0.5, + interactive=True, + visible=False, + ) + + delay_feedback = gr.Slider( + minimum=0.0, + maximum=1.0, + label=i18n("Delay Feedback"), + info=i18n("Set the delay feedback."), + value=0.0, + interactive=True, + visible=False, + ) + + delay_mix = gr.Slider( + minimum=0.0, + maximum=1.0, + label=i18n("Delay Mix"), + info=i18n("Set the delay mix."), + value=0.5, + interactive=True, + visible=False, + ) + with gr.Accordion(i18n("Preset Settings"), open=False): + with gr.Row(): + preset_dropdown = gr.Dropdown( + label=i18n("Select Custom Preset"), + choices=list_json_files(PRESETS_DIR), + interactive=True, + ) + presets_refresh_button = gr.Button(i18n("Refresh Presets")) + import_file = gr.File( + label=i18n("Select file to import"), + file_count="single", + type="filepath", + interactive=True, + ) + import_file.change( + import_presets_button, + inputs=import_file, + outputs=[preset_dropdown], + ) + presets_refresh_button.click( + refresh_presets, outputs=preset_dropdown + ) + with gr.Row(): + preset_name_input = gr.Textbox( + label=i18n("Preset Name"), + placeholder=i18n("Enter preset name"), + ) + export_button = gr.Button(i18n("Export Preset")) pitch = gr.Slider( minimum=-24, maximum=24, @@ -392,6 +867,28 @@ def inference_tab(): value=0.5, interactive=True, ) + preset_dropdown.change( + update_sliders, + inputs=preset_dropdown, + outputs=[ + pitch, + filter_radius, + index_rate, + rms_mix_rate, + protect, + ], + ) + export_button.click( + export_presets_button, + inputs=[ + preset_name_input, + pitch, + filter_radius, + index_rate, + rms_mix_rate, + protect, + ], + ) hop_length = gr.Slider( minimum=1, maximum=512, @@ -424,8 +921,9 @@ def inference_tab(): info=i18n("Model used for learning speaker embedding."), choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], value="contentvec", @@ -433,21 +931,34 @@ def inference_tab(): ) with gr.Column(visible=False) as embedder_custom: with gr.Accordion(i18n("Custom Embedder"), open=True): - embedder_upload_custom = gr.File( - label=i18n("Upload Custom Embedder"), - type="filepath", - interactive=True, + with gr.Row(): + embedder_model_custom = gr.Dropdown( + label=i18n("Select Custom Embedder"), + choices=refresh_embedders_folders(), + interactive=True, + allow_custom_value=True, + ) + refresh_embedders_button = gr.Button( + i18n("Refresh embedders") + ) + folder_name_input = gr.Textbox( + label=i18n("Folder Name"), interactive=True ) - embedder_custom_refresh = gr.Button(i18n("Refresh")) - embedder_model_custom = gr.Dropdown( - label=i18n("Custom Embedder"), - info=i18n( - "Select the custom embedder to use for the conversion." - ), - choices=sorted(custom_embedders), - interactive=True, - allow_custom_value=True, + with gr.Row(): + bin_file_upload = gr.File( + label=i18n("Upload .bin"), + type="filepath", + interactive=True, + ) + config_file_upload = gr.File( + label=i18n("Upload .json"), + type="filepath", + interactive=True, + ) + move_files_button = gr.Button( + i18n("Move files to custom embedder folder") ) + f0_file = gr.File( label=i18n( "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls." @@ -543,36 +1054,425 @@ def inference_tab(): value=False, interactive=True, ) - pitch_batch = gr.Slider( - minimum=-24, - maximum=24, - step=1, - label=i18n("Pitch"), + formant_shifting_batch = gr.Checkbox( + label=i18n("Formant Shifting"), info=i18n( - "Set the pitch of the audio, the higher the value, the higher the pitch." + "Enable formant shifting. Used for male to female and vice-versa convertions." ), - value=0, + value=False, + visible=True, interactive=True, ) - filter_radius_batch = gr.Slider( - minimum=0, - maximum=7, - label=i18n("Filter Radius"), - info=i18n( - "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration." - ), - value=3, - step=1, + with gr.Row(): + formant_preset_batch = gr.Dropdown( + label=i18n("Browse presets for formanting"), + info=i18n( + "Presets are located in /assets/formant_shift folder" + ), + choices=list_json_files(FORMANTSHIFT_DIR), + visible=False, + interactive=True, + ) + formant_refresh_button_batch = gr.Button( + value="Refresh", + visible=False, + variant="primary", + ) + formant_qfrency_batch = gr.Slider( + value=1.0, + info=i18n("Default value is 1.0"), + label=i18n("Quefrency for formant shifting"), + minimum=0.0, + maximum=16.0, + step=0.1, + visible=False, interactive=True, ) - index_rate_batch = gr.Slider( - minimum=0, - maximum=1, - label=i18n("Search Feature Ratio"), - info=i18n( - "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio." - ), - value=0.75, + formant_timbre_batch = gr.Slider( + value=1.0, + info=i18n("Default value is 1.0"), + label=i18n("Timbre for formant shifting"), + minimum=0.0, + maximum=16.0, + step=0.1, + visible=False, + interactive=True, + ) + post_process_batch = gr.Checkbox( + label=i18n("Post-Process"), + info=i18n("Post-process the audio to apply effects to the output."), + value=False, + interactive=True, + ) + reverb_batch = gr.Checkbox( + label=i18n("Reverb"), + info=i18n("Apply reverb to the audio."), + value=False, + interactive=True, + visible=False, + ) + reverb_room_size_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Room Size"), + info=i18n("Set the room size of the reverb."), + value=0.5, + interactive=True, + visible=False, + ) + + reverb_damping_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Damping"), + info=i18n("Set the damping of the reverb."), + value=0.5, + interactive=True, + visible=False, + ) + + reverb_wet_gain_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Wet Gain"), + info=i18n("Set the wet gain of the reverb."), + value=0.33, + interactive=True, + visible=False, + ) + + reverb_dry_gain_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Dry Gain"), + info=i18n("Set the dry gain of the reverb."), + value=0.4, + interactive=True, + visible=False, + ) + + reverb_width_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Width"), + info=i18n("Set the width of the reverb."), + value=1.0, + interactive=True, + visible=False, + ) + + reverb_freeze_mode_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Reverb Freeze Mode"), + info=i18n("Set the freeze mode of the reverb."), + value=0.0, + interactive=True, + visible=False, + ) + pitch_shift_batch = gr.Checkbox( + label=i18n("Pitch Shift"), + info=i18n("Apply pitch shift to the audio."), + value=False, + interactive=True, + visible=False, + ) + pitch_shift_semitones_batch = gr.Slider( + minimum=-12, + maximum=12, + label=i18n("Pitch Shift Semitones"), + info=i18n("Set the pitch shift semitones."), + value=0, + interactive=True, + visible=False, + ) + limiter_batch = gr.Checkbox( + label=i18n("Limiter"), + info=i18n("Apply limiter to the audio."), + value=False, + interactive=True, + visible=False, + ) + limiter_threshold_batch = gr.Slider( + minimum=-60, + maximum=0, + label=i18n("Limiter Threshold dB"), + info=i18n("Set the limiter threshold dB."), + value=-6, + interactive=True, + visible=False, + ) + + limiter_release_time_batch = gr.Slider( + minimum=0.01, + maximum=1, + label=i18n("Limiter Release Time"), + info=i18n("Set the limiter release time."), + value=0.05, + interactive=True, + visible=False, + ) + gain_batch = gr.Checkbox( + label=i18n("Gain"), + info=i18n("Apply gain to the audio."), + value=False, + interactive=True, + visible=False, + ) + gain_db_batch = gr.Slider( + minimum=-60, + maximum=60, + label=i18n("Gain dB"), + info=i18n("Set the gain dB."), + value=0, + interactive=True, + visible=False, + ) + distortion_batch = gr.Checkbox( + label=i18n("Distortion"), + info=i18n("Apply distortion to the audio."), + value=False, + interactive=True, + visible=False, + ) + distortion_gain_batch = gr.Slider( + minimum=-60, + maximum=60, + label=i18n("Distortion Gain"), + info=i18n("Set the distortion gain."), + value=25, + interactive=True, + visible=False, + ) + chorus_batch = gr.Checkbox( + label=i18n("chorus"), + info=i18n("Apply chorus to the audio."), + value=False, + interactive=True, + visible=False, + ) + chorus_rate_batch = gr.Slider( + minimum=0, + maximum=100, + label=i18n("Chorus Rate Hz"), + info=i18n("Set the chorus rate Hz."), + value=1.0, + interactive=True, + visible=False, + ) + + chorus_depth_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("chorus Depth"), + info=i18n("Set the chorus depth."), + value=0.25, + interactive=True, + visible=False, + ) + + chorus_center_delay_batch = gr.Slider( + minimum=7, + maximum=8, + label=i18n("chorus Center Delay ms"), + info=i18n("Set the chorus center delay ms."), + value=7, + interactive=True, + visible=False, + ) + + chorus_feedback_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("chorus Feedback"), + info=i18n("Set the chorus feedback."), + value=0.0, + interactive=True, + visible=False, + ) + + chorus_mix_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Chorus Mix"), + info=i18n("Set the chorus mix."), + value=0.5, + interactive=True, + visible=False, + ) + bitcrush_batch = gr.Checkbox( + label=i18n("Bitcrush"), + info=i18n("Apply bitcrush to the audio."), + value=False, + interactive=True, + visible=False, + ) + bitcrush_bit_depth_batch = gr.Slider( + minimum=1, + maximum=32, + label=i18n("Bitcrush Bit Depth"), + info=i18n("Set the bitcrush bit depth."), + value=8, + interactive=True, + visible=False, + ) + clipping_batch = gr.Checkbox( + label=i18n("Clipping"), + info=i18n("Apply clipping to the audio."), + value=False, + interactive=True, + visible=False, + ) + clipping_threshold_batch = gr.Slider( + minimum=-60, + maximum=0, + label=i18n("Clipping Threshold"), + info=i18n("Set the clipping threshold."), + value=-6, + interactive=True, + visible=False, + ) + compressor_batch = gr.Checkbox( + label=i18n("Compressor"), + info=i18n("Apply compressor to the audio."), + value=False, + interactive=True, + visible=False, + ) + compressor_threshold_batch = gr.Slider( + minimum=-60, + maximum=0, + label=i18n("Compressor Threshold dB"), + info=i18n("Set the compressor threshold dB."), + value=0, + interactive=True, + visible=False, + ) + + compressor_ratio_batch = gr.Slider( + minimum=1, + maximum=20, + label=i18n("Compressor Ratio"), + info=i18n("Set the compressor ratio."), + value=1, + interactive=True, + visible=False, + ) + + compressor_attack_batch = gr.Slider( + minimum=0.0, + maximum=100, + label=i18n("Compressor Attack ms"), + info=i18n("Set the compressor attack ms."), + value=1.0, + interactive=True, + visible=False, + ) + + compressor_release_batch = gr.Slider( + minimum=0.01, + maximum=100, + label=i18n("Compressor Release ms"), + info=i18n("Set the compressor release ms."), + value=100, + interactive=True, + visible=False, + ) + delay_batch = gr.Checkbox( + label=i18n("Delay"), + info=i18n("Apply delay to the audio."), + value=False, + interactive=True, + visible=False, + ) + delay_seconds_batch = gr.Slider( + minimum=0.0, + maximum=5.0, + label=i18n("Delay Seconds"), + info=i18n("Set the delay seconds."), + value=0.5, + interactive=True, + visible=False, + ) + + delay_feedback_batch = gr.Slider( + minimum=0.0, + maximum=1.0, + label=i18n("Delay Feedback"), + info=i18n("Set the delay feedback."), + value=0.0, + interactive=True, + visible=False, + ) + + delay_mix_batch = gr.Slider( + minimum=0.0, + maximum=1.0, + label=i18n("Delay Mix"), + info=i18n("Set the delay mix."), + value=0.5, + interactive=True, + visible=False, + ) + with gr.Accordion(i18n("Preset Settings"), open=False): + with gr.Row(): + preset_dropdown = gr.Dropdown( + label=i18n("Select Custom Preset"), + interactive=True, + ) + presets_batch_refresh_button = gr.Button( + i18n("Refresh Presets") + ) + import_file = gr.File( + label=i18n("Select file to import"), + file_count="single", + type="filepath", + interactive=True, + ) + import_file.change( + import_presets_button, + inputs=import_file, + outputs=[preset_dropdown], + ) + presets_batch_refresh_button.click( + refresh_presets, outputs=preset_dropdown + ) + with gr.Row(): + preset_name_input = gr.Textbox( + label=i18n("Preset Name"), + placeholder=i18n("Enter preset name"), + ) + export_button = gr.Button(i18n("Export Preset")) + pitch_batch = gr.Slider( + minimum=-24, + maximum=24, + step=1, + label=i18n("Pitch"), + info=i18n( + "Set the pitch of the audio, the higher the value, the higher the pitch." + ), + value=0, + interactive=True, + ) + filter_radius_batch = gr.Slider( + minimum=0, + maximum=7, + label=i18n("Filter Radius"), + info=i18n( + "If the number is greater than or equal to three, employing median filtering on the collected tone results has the potential to decrease respiration." + ), + value=3, + step=1, + interactive=True, + ) + index_rate_batch = gr.Slider( + minimum=0, + maximum=1, + label=i18n("Search Feature Ratio"), + info=i18n( + "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio." + ), + value=0.75, interactive=True, ) rms_mix_rate_batch = gr.Slider( @@ -595,6 +1495,29 @@ def inference_tab(): value=0.5, interactive=True, ) + preset_dropdown.change( + update_sliders, + inputs=preset_dropdown, + outputs=[ + pitch_batch, + filter_radius_batch, + index_rate_batch, + rms_mix_rate_batch, + protect_batch, + ], + ) + export_button.click( + export_presets_button, + inputs=[ + preset_name_input, + pitch, + filter_radius, + index_rate, + rms_mix_rate, + protect, + ], + outputs=[], + ) hop_length_batch = gr.Slider( minimum=1, maximum=512, @@ -627,8 +1550,9 @@ def inference_tab(): info=i18n("Model used for learning speaker embedding."), choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], value="contentvec", @@ -642,23 +1566,37 @@ def inference_tab(): ) with gr.Column(visible=False) as embedder_custom_batch: with gr.Accordion(i18n("Custom Embedder"), open=True): - embedder_upload_custom_batch = gr.File( - label=i18n("Upload Custom Embedder"), - type="filepath", - interactive=True, + with gr.Row(): + embedder_model_custom_batch = gr.Dropdown( + label=i18n("Select Custom Embedder"), + choices=refresh_embedders_folders(), + interactive=True, + allow_custom_value=True, + ) + refresh_embedders_button_batch = gr.Button( + i18n("Refresh embedders") + ) + folder_name_input_batch = gr.Textbox( + label=i18n("Folder Name"), interactive=True ) - embedder_custom_refresh_batch = gr.Button(i18n("Refresh")) - embedder_model_custom_batch = gr.Dropdown( - label=i18n("Custom Embedder"), - info=i18n( - "Select the custom embedder to use for the conversion." - ), - choices=sorted(custom_embedders), - interactive=True, - allow_custom_value=True, + with gr.Row(): + bin_file_upload_batch = gr.File( + label=i18n("Upload .bin"), + type="filepath", + interactive=True, + ) + config_file_upload_batch = gr.File( + label=i18n("Upload .json"), + type="filepath", + interactive=True, + ) + move_files_button_batch = gr.Button( + i18n("Move files to custom embedder folder") ) convert_button2 = gr.Button(i18n("Convert")) + stop_button = gr.Button(i18n("Stop convert"), visible=False) + stop_button.click(fn=stop_infer, inputs=[], outputs=[]) with gr.Row(): vc_output3 = gr.Textbox( @@ -679,11 +1617,276 @@ def inference_tab(): return {"visible": True, "__type__": "update"} return {"visible": False, "__type__": "update"} + def enable_stop_convert_button(): + return {"visible": False, "__type__": "update"}, { + "visible": True, + "__type__": "update", + } + + def disable_stop_convert_button(): + return {"visible": True, "__type__": "update"}, { + "visible": False, + "__type__": "update", + } + + def toggle_visible_formant_shifting(checkbox): + if checkbox: + return ( + gr.update(visible=True), + gr.update(visible=True), + gr.update(visible=True), + gr.update(visible=True), + ) + else: + return ( + gr.update(visible=False), + gr.update(visible=False), + gr.update(visible=False), + gr.update(visible=False), + ) + + def update_visibility(checkbox, count): + return [gr.update(visible=checkbox) for _ in range(count)] + + def post_process_visible(checkbox): + return update_visibility(checkbox, 11) + + def reverb_visible(checkbox): + return update_visibility(checkbox, 6) + + def limiter_visible(checkbox): + return update_visibility(checkbox, 2) + + def chorus_visible(checkbox): + return update_visibility(checkbox, 6) + + def bitcrush_visible(checkbox): + return update_visibility(checkbox, 1) + + def compress_visible(checkbox): + return update_visibility(checkbox, 4) + + def delay_visible(checkbox): + return update_visibility(checkbox, 3) + clean_audio.change( fn=toggle_visible, inputs=[clean_audio], outputs=[clean_strength], ) + formant_shifting.change( + fn=toggle_visible_formant_shifting, + inputs=[formant_shifting], + outputs=[ + formant_preset, + formant_refresh_button, + formant_qfrency, + formant_timbre, + ], + ) + formant_shifting_batch.change( + fn=toggle_visible_formant_shifting, + inputs=[formant_shifting], + outputs=[ + formant_preset_batch, + formant_refresh_button_batch, + formant_qfrency_batch, + formant_timbre_batch, + ], + ) + formant_refresh_button.click( + fn=refresh_formant, + inputs=[], + outputs=[formant_preset], + ) + formant_preset.change( + fn=update_sliders_formant, + inputs=[formant_preset], + outputs=[ + formant_qfrency, + formant_timbre, + ], + ) + formant_preset_batch.change( + fn=update_sliders_formant, + inputs=[formant_preset_batch], + outputs=[ + formant_qfrency, + formant_timbre, + ], + ) + post_process.change( + fn=post_process_visible, + inputs=[post_process], + outputs=[ + reverb, + pitch_shift, + limiter, + gain, + distortion, + chorus, + bitcrush, + clipping, + compressor, + delay, + clean_audio, + ], + ) + + reverb.change( + fn=reverb_visible, + inputs=[reverb], + outputs=[ + reverb_room_size, + reverb_damping, + reverb_wet_gain, + reverb_dry_gain, + reverb_width, + reverb_freeze_mode, + ], + ) + pitch_shift.change( + fn=toggle_visible, + inputs=[pitch_shift], + outputs=[pitch_shift_semitones], + ) + limiter.change( + fn=limiter_visible, + inputs=[limiter], + outputs=[limiter_threshold, limiter_release_time], + ) + gain.change( + fn=toggle_visible, + inputs=[gain], + outputs=[gain_db], + ) + distortion.change( + fn=toggle_visible, + inputs=[distortion], + outputs=[distortion_gain], + ) + chorus.change( + fn=chorus_visible, + inputs=[chorus], + outputs=[ + chorus_rate, + chorus_depth, + chorus_center_delay, + chorus_feedback, + chorus_mix, + ], + ) + bitcrush.change( + fn=bitcrush_visible, + inputs=[bitcrush], + outputs=[bitcrush_bit_depth], + ) + clipping.change( + fn=toggle_visible, + inputs=[clipping], + outputs=[clipping_threshold], + ) + compressor.change( + fn=compress_visible, + inputs=[compressor], + outputs=[ + compressor_threshold, + compressor_ratio, + compressor_attack, + compressor_release, + ], + ) + delay.change( + fn=delay_visible, + inputs=[delay], + outputs=[delay_seconds, delay_feedback, delay_mix], + ) + post_process_batch.change( + fn=post_process_visible, + inputs=[post_process_batch], + outputs=[ + reverb_batch, + pitch_shift_batch, + limiter_batch, + gain_batch, + distortion_batch, + chorus_batch, + bitcrush_batch, + clipping_batch, + compressor_batch, + delay_batch, + clean_audio_batch, + ], + ) + + reverb_batch.change( + fn=reverb_visible, + inputs=[reverb_batch], + outputs=[ + reverb_room_size_batch, + reverb_damping_batch, + reverb_wet_gain_batch, + reverb_dry_gain_batch, + reverb_width_batch, + reverb_freeze_mode_batch, + ], + ) + pitch_shift_batch.change( + fn=toggle_visible, + inputs=[pitch_shift_batch], + outputs=[pitch_shift_semitones_batch], + ) + limiter_batch.change( + fn=limiter_visible, + inputs=[limiter_batch], + outputs=[limiter_threshold_batch, limiter_release_time_batch], + ) + gain_batch.change( + fn=toggle_visible, + inputs=[gain_batch], + outputs=[gain_db_batch], + ) + distortion_batch.change( + fn=toggle_visible, + inputs=[distortion_batch], + outputs=[distortion_gain_batch], + ) + chorus_batch.change( + fn=chorus_visible, + inputs=[chorus_batch], + outputs=[ + chorus_rate_batch, + chorus_depth_batch, + chorus_center_delay_batch, + chorus_feedback_batch, + chorus_mix_batch, + ], + ) + bitcrush_batch.change( + fn=bitcrush_visible, + inputs=[bitcrush_batch], + outputs=[bitcrush_bit_depth_batch], + ) + clipping_batch.change( + fn=toggle_visible, + inputs=[clipping_batch], + outputs=[clipping_threshold_batch], + ) + compressor_batch.change( + fn=compress_visible, + inputs=[compressor_batch], + outputs=[ + compressor_threshold_batch, + compressor_ratio_batch, + compressor_attack_batch, + compressor_release_batch, + ], + ) + delay_batch.change( + fn=delay_visible, + inputs=[delay_batch], + outputs=[delay_seconds_batch, delay_feedback_batch, delay_mix_batch], + ) clean_audio_batch.change( fn=toggle_visible, inputs=[clean_audio_batch], @@ -706,8 +1909,6 @@ def inference_tab(): model_file, index_file, audio, - embedder_model_custom, - embedder_model_custom_batch, ], ) audio.change( @@ -740,43 +1941,76 @@ def inference_tab(): inputs=[embedder_model], outputs=[embedder_custom], ) - embedder_upload_custom.upload( - fn=save_drop_custom_embedder, - inputs=[embedder_upload_custom], - outputs=[embedder_upload_custom], - ) - embedder_custom_refresh.click( - fn=change_choices, - inputs=[], - outputs=[ - model_file, - index_file, - audio, - embedder_model_custom, - embedder_model_custom_batch, - ], - ) embedder_model_batch.change( fn=toggle_visible_embedder_custom, inputs=[embedder_model_batch], outputs=[embedder_custom_batch], ) - embedder_upload_custom_batch.upload( - fn=save_drop_custom_embedder, - inputs=[embedder_upload_custom_batch], - outputs=[embedder_upload_custom_batch], + move_files_button.click( + fn=create_folder_and_move_files, + inputs=[folder_name_input, bin_file_upload, config_file_upload], + outputs=[], ) - embedder_custom_refresh_batch.click( - fn=change_choices, + refresh_embedders_button.click( + fn=lambda: gr.update(choices=refresh_embedders_folders()), inputs=[], - outputs=[ - model_file, - index_file, - audio, - embedder_model_custom, - embedder_model_custom_batch, + outputs=[embedder_model_custom], + ) + move_files_button_batch.click( + fn=create_folder_and_move_files, + inputs=[ + folder_name_input_batch, + bin_file_upload_batch, + config_file_upload_batch, ], + outputs=[], ) + refresh_embedders_button_batch.click( + fn=lambda: gr.update(choices=refresh_embedders_folders()), + inputs=[], + outputs=[embedder_model_custom_batch], + ) + # Sliders variables + reverb_sliders = [ + reverb_room_size, + reverb_damping, + reverb_wet_gain, + reverb_dry_gain, + reverb_width, + reverb_freeze_mode, + ] + pitch_shift_sliders = [pitch_shift_semitones] + limiter_sliders = [limiter_threshold, limiter_release_time] + gain_sliders = [gain_db] + distortion_sliders = [distortion_gain] + chorus_sliders = [ + chorus_rate, + chorus_depth, + chorus_center_delay, + chorus_feedback, + chorus_mix, + ] + bitcrush_sliders = [bitcrush_bit_depth] + clipping_sliders = [clipping_threshold] + compressor_sliders = [ + compressor_threshold, + compressor_ratio, + compressor_attack, + compressor_release, + ] + delay_sliders = [delay_seconds, delay_feedback, delay_mix] + sliders = [ + *reverb_sliders, + *pitch_shift_sliders, + *limiter_sliders, + *gain_sliders, + *distortion_sliders, + *chorus_sliders, + *bitcrush_sliders, + *clipping_sliders, + *compressor_sliders, + *delay_sliders, + ] convert_button1.click( fn=run_infer_script, inputs=[ @@ -800,9 +2034,65 @@ def inference_tab(): f0_file, embedder_model, embedder_model_custom, + formant_shifting, + formant_qfrency, + formant_timbre, + post_process, + reverb, + pitch_shift, + limiter, + gain, + distortion, + chorus, + bitcrush, + clipping, + compressor, + delay, + *sliders, ], outputs=[vc_output1, vc_output2], ) + # Batch sliders variables + reverb_sliders_batch = [ + reverb_room_size_batch, + reverb_damping_batch, + reverb_wet_gain_batch, + reverb_dry_gain_batch, + reverb_width_batch, + reverb_freeze_mode_batch, + ] + pitch_shift_sliders_batch = [pitch_shift_semitones_batch] + limiter_sliders_batch = [limiter_threshold_batch, limiter_release_time_batch] + gain_sliders_batch = [gain_db_batch] + distortion_sliders_batch = [distortion_gain_batch] + chorus_sliders_batch = [ + chorus_rate_batch, + chorus_depth_batch, + chorus_center_delay_batch, + chorus_feedback_batch, + chorus_mix_batch, + ] + bitcrush_sliders_batch = [bitcrush_bit_depth_batch] + clipping_sliders_batch = [clipping_threshold_batch] + compressor_sliders_batch = [ + compressor_threshold_batch, + compressor_ratio_batch, + compressor_attack_batch, + compressor_release_batch, + ] + delay_sliders_batch = [delay_seconds_batch, delay_feedback_batch, delay_mix_batch] + sliders_batch = [ + *reverb_sliders_batch, + *pitch_shift_sliders_batch, + *limiter_sliders_batch, + *gain_sliders_batch, + *distortion_sliders_batch, + *chorus_sliders_batch, + *bitcrush_sliders_batch, + *clipping_sliders_batch, + *compressor_sliders_batch, + *delay_sliders_batch, + ] convert_button2.click( fn=run_batch_infer_script, inputs=[ @@ -826,6 +2116,31 @@ def inference_tab(): f0_file_batch, embedder_model_batch, embedder_model_custom_batch, + formant_shifting_batch, + formant_qfrency_batch, + formant_timbre_batch, + post_process_batch, + reverb_batch, + pitch_shift_batch, + limiter_batch, + gain_batch, + distortion_batch, + chorus_batch, + bitcrush_batch, + clipping_batch, + compressor_batch, + delay_batch, + *sliders_batch, ], outputs=[vc_output3], ) + convert_button2.click( + fn=enable_stop_convert_button, + inputs=[], + outputs=[convert_button2, stop_button], + ) + stop_button.click( + fn=disable_stop_convert_button, + inputs=[], + outputs=[convert_button2, stop_button], + ) diff --git a/tabs/settings/lang.py b/tabs/settings/lang.py index 9971c7a7f7732a00a7a87a74b242592102848d32..3dc8d7b8b4f2423a709d53f2ef90ebe1577f08b4 100644 --- a/tabs/settings/lang.py +++ b/tabs/settings/lang.py @@ -28,7 +28,7 @@ def save_lang_settings(selected_language): if selected_language == "Language automatically detected in the system": config["lang"]["override"] = False else: - config["lang"]["override"] = False + config["lang"]["override"] = True config["lang"]["selected_lang"] = selected_language gr.Info("Language have been saved. Restart Applio to apply the changes.") @@ -45,7 +45,8 @@ def lang_tab(): "Select the language you want to use. (Requires restarting Applio)" ), value=get_language_settings(), - choices=["Language automatically detected in the system"], + choices=["Language automatically detected in the system"] + + i18n._get_available_languages(), interactive=True, ) diff --git a/tabs/settings/restart.py b/tabs/settings/restart.py index 0ac3255443e05871b159007cd15a8d703c29e795..50c3bce5850fe9dac6c11255f58cfb2f1f77951f 100644 --- a/tabs/settings/restart.py +++ b/tabs/settings/restart.py @@ -1,16 +1,28 @@ import gradio as gr import os import sys +import json now_dir = os.getcwd() -pid_file_path = os.path.join(now_dir, "rvc", "train", "train_pid.txt") -def restart_applio(): - if os.name != "nt": - os.system("clear") - else: - os.system("cls") +def stop_train(model_name: str): + pid_file_path = os.path.join(now_dir, "logs", model_name, "config.json") + try: + with open(pid_file_path, "r") as pid_file: + pid_data = json.load(pid_file) + pids = pid_data.get("process_pids", []) + with open(pid_file_path, "w") as pid_file: + pid_data.pop("process_pids", None) + json.dump(pid_data, pid_file, indent=4) + for pid in pids: + os.kill(pid, 9) + except: + pass + + +def stop_infer(): + pid_file_path = os.path.join(now_dir, "assets", "infer_pid.txt") try: with open(pid_file_path, "r") as pid_file: pids = [int(pid) for pid in pid_file.readlines()] @@ -19,6 +31,13 @@ def restart_applio(): os.remove(pid_file_path) except: pass + + +def restart_applio(): + if os.name != "nt": + os.system("clear") + else: + os.system("cls") python = sys.executable os.execl(python, python, *sys.argv) @@ -32,4 +51,8 @@ def restart_tab(): with gr.Row(): with gr.Column(): restart_button = gr.Button(i18n("Restart Applio")) - restart_button.click() + restart_button.click( + fn=restart_applio, + inputs=[], + outputs=[], + ) diff --git a/tabs/train/train.py b/tabs/train/train.py index 7606a608292f0f26a615d8f2634f796a46863096..bd594b7d892d85528448e46adbf02757c10a4036 100644 --- a/tabs/train/train.py +++ b/tabs/train/train.py @@ -1,19 +1,21 @@ import os -from multiprocessing import cpu_count -import sys import shutil +import sys +from multiprocessing import cpu_count + import gradio as gr + from assets.i18n.i18n import I18nAuto from core import ( - run_preprocess_script, run_extract_script, - run_train_script, run_index_script, + run_preprocess_script, run_prerequisites_script, + run_train_script, ) -from rvc.configs.config import max_vram_gpu, get_gpu_info +from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu from rvc.lib.utils import format_title -from tabs.settings.restart import restart_applio +from tabs.settings.restart import stop_train i18n = I18nAuto() now_dir = os.getcwd() @@ -61,7 +63,7 @@ sup_audioext = { # Custom Pretraineds pretraineds_custom_path = os.path.join( - now_dir, "rvc", "pretraineds", "pretraineds_custom" + now_dir, "rvc", "models", "pretraineds", "pretraineds_custom" ) pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir) @@ -141,13 +143,12 @@ def refresh_models_and_datasets(): ) -# Refresh Custom Pretraineds +# Refresh Custom Embedders def get_embedder_custom_list(): return [ - os.path.join(dirpath, filename) - for dirpath, _, filenames in os.walk(custom_embedder_root_relative) - for filename in filenames - if filename.endswith(".pt") + os.path.join(dirpath, dirname) + for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative) + for dirname in dirnames ] @@ -168,7 +169,7 @@ def save_drop_model(dropbox): pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name) if os.path.exists(pretrained_path): os.remove(pretrained_path) - os.rename(dropbox, pretrained_path) + shutil.copy(dropbox, pretrained_path) gr.Info( i18n( "Click the refresh button to see the pretrained file in the dropdown menu." @@ -195,7 +196,7 @@ def save_drop_dataset_audio(dropbox, dataset_name): destination_path = os.path.join(dataset_path, audio_file) if os.path.exists(destination_path): os.remove(destination_path) - os.rename(dropbox, destination_path) + shutil.copy(dropbox, destination_path) gr.Info( i18n( "The audio file has been successfully added to the dataset. Please click the preprocess button." @@ -208,23 +209,31 @@ def save_drop_dataset_audio(dropbox, dataset_name): # Drop Custom Embedder -def save_drop_custom_embedder(dropbox): - if ".pt" not in dropbox: - gr.Info( - i18n("The file you dropped is not a valid embedder file. Please try again.") - ) - else: - file_name = os.path.basename(dropbox) - custom_embedder_path = os.path.join(custom_embedder_root, file_name) - if os.path.exists(custom_embedder_path): - os.remove(custom_embedder_path) - os.rename(dropbox, custom_embedder_path) - gr.Info( - i18n( - "Click the refresh button to see the embedder file in the dropdown menu." - ) - ) - return None +def create_folder_and_move_files(folder_name, bin_file, config_file): + if not folder_name: + return "Folder name must not be empty." + + folder_name = os.path.join(custom_embedder_root, folder_name) + os.makedirs(folder_name, exist_ok=True) + + if bin_file: + bin_file_path = os.path.join(folder_name, os.path.basename(bin_file)) + shutil.copy(bin_file, bin_file_path) + + if config_file: + config_file_path = os.path.join(folder_name, os.path.basename(config_file)) + shutil.copy(config_file, config_file_path) + + return f"Files moved to folder {folder_name}" + + +def refresh_embedders_folders(): + custom_embedders = [ + os.path.join(dirpath, dirname) + for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative) + for dirname in dirnames + ] + return custom_embedders # Export @@ -293,76 +302,90 @@ def upload_to_google_drive(pth_path, index_path): # Train Tab def train_tab(): + with gr.Row(): + model_name = gr.Dropdown( + label=i18n("Model Name"), + info=i18n("Name of the new model."), + choices=get_models_list(), + value="my-project", + interactive=True, + allow_custom_value=True, + ) + sampling_rate = gr.Radio( + label=i18n("Sampling Rate"), + info=i18n("The sampling rate of the audio files."), + choices=["32000", "40000", "48000"], + value="40000", + interactive=True, + ) + rvc_version = gr.Radio( + label=i18n("Model Architecture"), + info=i18n("Version of the model architecture."), + choices=["v1", "v2"], + value="v2", + interactive=True, + ) with gr.Accordion(i18n("Preprocess")): - with gr.Row(): - with gr.Column(): - model_name = gr.Dropdown( - label=i18n("Model Name"), - info=i18n("Name of the new model."), - choices=get_models_list(), - value="my-project", - interactive=True, - allow_custom_value=True, - ) - dataset_path = gr.Dropdown( - label=i18n("Dataset Path"), - info=i18n("Path to the dataset folder."), - # placeholder=i18n("Enter dataset path"), - choices=get_datasets_list(), - allow_custom_value=True, + dataset_path = gr.Dropdown( + label=i18n("Dataset Path"), + info=i18n("Path to the dataset folder."), + # placeholder=i18n("Enter dataset path"), + choices=get_datasets_list(), + allow_custom_value=True, + interactive=True, + ) + dataset_creator = gr.Checkbox( + label=i18n("Dataset Creator"), + value=False, + interactive=True, + visible=True, + ) + with gr.Column(visible=False) as dataset_creator_settings: + with gr.Accordion(i18n("Dataset Creator")): + dataset_name = gr.Textbox( + label=i18n("Dataset Name"), + info=i18n("Name of the new dataset."), + placeholder=i18n("Enter dataset name"), interactive=True, ) - refresh = gr.Button(i18n("Refresh")) - dataset_creator = gr.Checkbox( - label=i18n("Dataset Creator"), - value=False, - interactive=True, - visible=True, - ) - - with gr.Column(visible=False) as dataset_creator_settings: - with gr.Accordion(i18n("Dataset Creator")): - dataset_name = gr.Textbox( - label=i18n("Dataset Name"), - info=i18n("Name of the new dataset."), - placeholder=i18n("Enter dataset name"), - interactive=True, - ) - upload_audio_dataset = gr.File( - label=i18n("Upload Audio Dataset"), - type="filepath", - interactive=True, - ) - - with gr.Column(): - sampling_rate = gr.Radio( - label=i18n("Sampling Rate"), - info=i18n("The sampling rate of the audio files."), - choices=["32000", "40000", "48000"], - value="40000", + upload_audio_dataset = gr.File( + label=i18n("Upload Audio Dataset"), + type="filepath", interactive=True, ) + refresh = gr.Button(i18n("Refresh")) - rvc_version = gr.Radio( - label=i18n("RVC Version"), - info=i18n("The RVC version of the model."), - choices=["v1", "v2"], - value="v2", + with gr.Accordion(i18n("Advanced Settings"), open=False): + cpu_cores_preprocess = gr.Slider( + 1, + 64, + cpu_count(), + step=1, + label=i18n("CPU Cores"), + info=i18n( + "The number of CPU cores to use in the preprocess. The default setting are your cpu cores, which is recommended for most cases." + ), + interactive=True, + ) + with gr.Row(): + cut_preprocess = gr.Checkbox( + label=i18n("Audio cutting"), + info=i18n( + "It's recommended to deactivate this option if your dataset has already been processed." + ), + value=True, interactive=True, + visible=True, ) - - cpu_cores_preprocess = gr.Slider( - 1, - 64, - cpu_count(), - step=1, - label=i18n("CPU Cores"), + process_effects = gr.Checkbox( + label=i18n("Process effects"), info=i18n( - "The number of CPU cores to utilize. The default setting are your cpu cores, which is recommended for most cases." + "It's recommended to deactivate this option if your dataset has already been processed." ), + value=True, interactive=True, + visible=True, ) - preprocess_output_info = gr.Textbox( label=i18n("Output Information"), info=i18n("The output information will be displayed here."), @@ -375,7 +398,14 @@ def train_tab(): preprocess_button = gr.Button(i18n("Preprocess Dataset")) preprocess_button.click( fn=run_preprocess_script, - inputs=[model_name, dataset_path, sampling_rate, cpu_cores_preprocess], + inputs=[ + model_name, + dataset_path, + sampling_rate, + cpu_cores_preprocess, + cut_preprocess, + process_effects, + ], outputs=[preprocess_output_info], api_name="preprocess_dataset", ) @@ -397,13 +427,15 @@ def train_tab(): info=i18n("Model used for learning speaker embedding."), choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], value="contentvec", interactive=True, ) + hop_length = gr.Slider( 1, 512, @@ -416,6 +448,25 @@ def train_tab(): visible=False, interactive=True, ) + with gr.Row(visible=False) as embedder_custom: + with gr.Accordion("Custom Embedder", open=True): + with gr.Row(): + embedder_model_custom = gr.Dropdown( + label="Select Custom Embedder", + choices=refresh_embedders_folders(), + interactive=True, + allow_custom_value=True, + ) + refresh_embedders_button = gr.Button("Refresh embedders") + folder_name_input = gr.Textbox(label="Folder Name", interactive=True) + with gr.Row(): + bin_file_upload = gr.File( + label="Upload .bin", type="filepath", interactive=True + ) + config_file_upload = gr.File( + label="Upload .json", type="filepath", interactive=True + ) + move_files_button = gr.Button("Move files to custom embedder folder") pitch_guidance_extract = gr.Checkbox( label=i18n("Pitch Guidance"), info=i18n( @@ -440,7 +491,7 @@ def train_tab(): step=1, label=i18n("CPU Cores"), info=i18n( - "The number of CPU cores to use in the index extraction process. The default setting are your cpu cores, which is recommended for most cases." + "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases." ), interactive=True, ) @@ -449,10 +500,10 @@ def train_tab(): gpu_extract = gr.Textbox( label=i18n("GPU Number"), info=i18n( - "Specify the number of GPUs you wish to utilize for training by entering them separated by hyphens (-)." + "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)." ), placeholder=i18n("0 to ∞ separated by -"), - value="0", + value=str(get_number_of_gpus()), interactive=True, ) gr.Textbox( @@ -462,24 +513,6 @@ def train_tab(): interactive=False, ) - with gr.Column(visible=False) as embedder_custom: - with gr.Accordion(i18n("Custom Embedder"), open=True): - embedder_upload_custom = gr.File( - label=i18n("Upload Custom Embedder"), - type="filepath", - interactive=True, - ) - embedder_custom_refresh = gr.Button(i18n("Refresh")) - embedder_model_custom = gr.Dropdown( - label=i18n("Custom Embedder"), - info=i18n( - "Select the custom embedder to use for the conversion." - ), - choices=sorted(get_embedder_custom_list()), - interactive=True, - allow_custom_value=True, - ) - extract_output_info = gr.Textbox( label=i18n("Output Information"), info=i18n("The output information will be displayed here."), @@ -646,7 +679,7 @@ def train_tab(): "Specify the number of GPUs you wish to utilize for training by entering them separated by hyphens (-)." ), placeholder=i18n("0 to ∞ separated by -"), - value="0", + value=str(get_number_of_gpus()), interactive=True, ) gr.Textbox( @@ -676,6 +709,15 @@ def train_tab(): ), interactive=True, ) + index_algorithm = gr.Radio( + label=i18n("Index Algorithm"), + info=i18n( + "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets." + ), + choices=["Auto", "Faiss", "KMeans"], + value="Auto", + interactive=True, + ) with gr.Row(): train_output_info = gr.Textbox( @@ -705,6 +747,7 @@ def train_tab(): overtraining_threshold, pretrained, sync_graph, + index_algorithm, cache_dataset_in_gpu, custom_pretrained, g_pretrained_path, @@ -714,19 +757,17 @@ def train_tab(): api_name="start_training", ) - stop_train_button = gr.Button( - i18n("Stop Training & Restart Applio"), visible=False - ) + stop_train_button = gr.Button(i18n("Stop Training"), visible=False) stop_train_button.click( - fn=restart_applio, - inputs=[], + fn=stop_train, + inputs=[model_name], outputs=[], ) index_button = gr.Button(i18n("Generate Index")) index_button.click( fn=run_index_script, - inputs=[model_name, rvc_version], + inputs=[model_name, rvc_version, index_algorithm], outputs=[train_output_info], api_name="generate_index", ) @@ -869,17 +910,19 @@ def train_tab(): inputs=[embedder_model], outputs=[embedder_custom], ) - embedder_upload_custom.upload( - fn=save_drop_custom_embedder, - inputs=[embedder_upload_custom], - outputs=[embedder_upload_custom], + embedder_model.change( + fn=toggle_visible_embedder_custom, + inputs=[embedder_model], + outputs=[embedder_custom], ) - embedder_custom_refresh.click( - fn=refresh_custom_embedder_list, - inputs=[], - outputs=[embedder_model_custom], + move_files_button.click( + fn=create_folder_and_move_files, + inputs=[folder_name_input, bin_file_upload, config_file_upload], + outputs=[], + ) + refresh_embedders_button.click( + fn=refresh_embedders_folders, inputs=[], outputs=[embedder_model_custom] ) - pretrained.change( fn=toggle_pretrained, inputs=[pretrained, custom_pretrained], @@ -944,4 +987,4 @@ def train_tab(): fn=refresh_pth_and_index_list, inputs=[], outputs=[pth_dropdown_export, index_dropdown_export], - ) \ No newline at end of file + ) diff --git a/tabs/tts/tts.py b/tabs/tts/tts.py index 63e4e5944588346312272e1acb933e90fa681a3e..e1292513c8623736c2c3d3324695f6acfb3631dc 100644 --- a/tabs/tts/tts.py +++ b/tabs/tts/tts.py @@ -3,6 +3,7 @@ import gradio as gr import regex as re import json import random +import shutil from core import ( run_tts_script, @@ -127,7 +128,7 @@ def save_drop_custom_embedder(dropbox): custom_embedder_path = os.path.join(custom_embedder_root, file_name) if os.path.exists(custom_embedder_path): os.remove(custom_embedder_path) - os.rename(dropbox, custom_embedder_path) + shutil.copy(dropbox, custom_embedder_path) gr.Info( i18n( "Click the refresh button to see the embedder file in the dropdown menu." @@ -364,8 +365,9 @@ def tts_tab(): info=i18n("Model used for learning speaker embedding."), choices=[ "contentvec", + "chinese-hubert-base", "japanese-hubert-base", - "chinese-hubert-large", + "korean-hubert-base", "custom", ], value="contentvec",