seamless-m4t-v2-large-fixing

Sleeping

App Files Files Community

Vaibhav Srivastav commited on Nov 30, 2023

Commit

5d906de

•

0 Parent(s):

Squash for release.

Browse files

Files changed (13) hide show

.gitattributes +37 -0
.gitignore +162 -0
.pre-commit-config.yaml +55 -0
.vscode/settings.json +21 -0
Dockerfile +61 -0
README.md +14 -0
app.py +417 -0
assets/sample_input.mp3 +3 -0
assets/sample_input_2.mp3 +3 -0
lang_list.py +255 -0
requirements.txt +4 -0
style.css +10 -0
whl/seamless_communication-1.0.0-py3-none-any.whl +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+gradio_cached_examples/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/myint/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        args: ["--in-place"]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.7.0
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          ["types-python-slugify", "types-requests", "types-PyYAML"]
+  - repo: https://github.com/psf/black
+    rev: 23.11.0
+    hooks:
+      - id: black
+        language_version: python3.10
+        args: ["--line-length", "119"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.6.1
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--extra-keys",
+            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
+          ]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
+    hooks:
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: ["--py37-plus"]
+      - id: nbqa-isort
+        args: ["--float-to-top"]

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        }
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,61 @@

+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # gradio dependencies \
+    ffmpeg \
+    # fairseq2 dependencies \
+    libsndfile-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ARG PYTHON_VERSION=3.10.13
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel && \
+    pip install "huggingface-hub==0.19.3" "hf-transfer==0.1.4"
+COPY --chown=1000 . ${HOME}/app
+RUN pip install -r ${HOME}/app/requirements.txt && \
+    pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.0/cu121 && \
+    pip install ${HOME}/app/whl/seamless_communication-1.0.0-py3-none-any.whl
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    TQDM_POSITION=-1 \
+    TQDM_MININTERVAL=1 \
+    SYSTEM=spaces
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Seamless M4T v2
+emoji: 📞
+colorFrom: blue
+colorTo: yellow
+sdk: docker
+pinned: false
+suggested_hardware: t4-medium
+models:
+  - facebook/seamless-m4t-v2-large
+  - facebook/SONAR
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,417 @@

+from __future__ import annotations
+import os
+import pathlib
+import gradio as gr
+import numpy as np
+import torch
+import torchaudio
+from fairseq2.assets import InProcAssetMetadataProvider, asset_store
+from huggingface_hub import snapshot_download
+from seamless_communication.inference import Translator
+from lang_list import (
+    ASR_TARGET_LANGUAGE_NAMES,
+    LANGUAGE_NAME_TO_CODE,
+    S2ST_TARGET_LANGUAGE_NAMES,
+    S2TT_TARGET_LANGUAGE_NAMES,
+    T2ST_TARGET_LANGUAGE_NAMES,
+    T2TT_TARGET_LANGUAGE_NAMES,
+    TEXT_SOURCE_LANGUAGE_NAMES,
+)
+CHECKPOINTS_PATH = pathlib.Path(os.getenv("CHECKPOINTS_PATH", "/home/user/app/models"))
+if not CHECKPOINTS_PATH.exists():
+    snapshot_download(repo_id="meta-private/M4Tv2", repo_type="model", local_dir=CHECKPOINTS_PATH)
+asset_store.env_resolvers.clear()
+asset_store.env_resolvers.append(lambda: "demo")
+demo_metadata = [
+    {
+        "name": "seamlessM4T_v2_large@demo",
+        "checkpoint": f"file://{CHECKPOINTS_PATH}/seamlessM4T_v2_large.pt",
+        "char_tokenizer": f"file://{CHECKPOINTS_PATH}/spm_char_lang38_tc.model",
+    },
+    {
+        "name": "vocoder_v2@demo",
+        "checkpoint": f"file://{CHECKPOINTS_PATH}/vocoder_v2.pt",
+    },
+]
+asset_store.metadata_providers.append(InProcAssetMetadataProvider(demo_metadata))
+DESCRIPTION = """\
+# SeamlessM4T
+[SeamlessM4T](https://github.com/facebookresearch/seamless_communication) is designed to provide high-quality
+translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+This unified model enables multiple tasks like Speech-to-Speech (S2ST), Speech-to-Text (S2TT), Text-to-Speech (T2ST)
+translation and more, without relying on multiple separate models.
+"""
+CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1" and torch.cuda.is_available()
+AUDIO_SAMPLE_RATE = 16000.0
+MAX_INPUT_AUDIO_LENGTH = 60  # in seconds
+DEFAULT_TARGET_LANGUAGE = "French"
+if torch.cuda.is_available():
+    device = torch.device("cuda:0")
+    dtype = torch.float16
+else:
+    device = torch.device("cpu")
+    dtype = torch.float32
+translator = Translator(
+    model_name_or_card="seamlessM4T_v2_large",
+    vocoder_name_or_card="vocoder_v2",
+    device=device,
+    dtype=dtype,
+    apply_mintox=True,
+)
+def preprocess_audio(input_audio: str) -> None:
+    arr, org_sr = torchaudio.load(input_audio)
+    new_arr = torchaudio.functional.resample(arr, orig_freq=org_sr, new_freq=AUDIO_SAMPLE_RATE)
+    max_length = int(MAX_INPUT_AUDIO_LENGTH * AUDIO_SAMPLE_RATE)
+    if new_arr.shape[1] > max_length:
+        new_arr = new_arr[:, :max_length]
+        gr.Warning(f"Input audio is too long. Only the first {MAX_INPUT_AUDIO_LENGTH} seconds is used.")
+    torchaudio.save(input_audio, new_arr, sample_rate=int(AUDIO_SAMPLE_RATE))
+def run_s2st(
+    input_audio: str, source_language: str, target_language: str
+) -> tuple[tuple[int, np.ndarray] | None, str]:
+    preprocess_audio(input_audio)
+    source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
+    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
+    out_texts, out_audios = translator.predict(
+        input=input_audio,
+        task_str="S2ST",
+        src_lang=source_language_code,
+        tgt_lang=target_language_code,
+    )
+    out_text = str(out_texts[0])
+    out_wav = out_audios.audio_wavs[0].cpu().detach().numpy()
+    return (int(AUDIO_SAMPLE_RATE), out_wav), out_text
+def run_s2tt(input_audio: str, source_language: str, target_language: str) -> str:
+    preprocess_audio(input_audio)
+    source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
+    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
+    out_texts, _ = translator.predict(
+        input=input_audio,
+        task_str="S2TT",
+        src_lang=source_language_code,
+        tgt_lang=target_language_code,
+    )
+    return str(out_texts[0])
+def run_t2st(input_text: str, source_language: str, target_language: str) -> tuple[tuple[int, np.ndarray] | None, str]:
+    source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
+    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
+    out_texts, out_audios = translator.predict(
+        input=input_text,
+        task_str="T2ST",
+        src_lang=source_language_code,
+        tgt_lang=target_language_code,
+    )
+    out_text = str(out_texts[0])
+    out_wav = out_audios.audio_wavs[0].cpu().detach().numpy()
+    return (int(AUDIO_SAMPLE_RATE), out_wav), out_text
+def run_t2tt(input_text: str, source_language: str, target_language: str) -> str:
+    source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
+    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
+    out_texts, _ = translator.predict(
+        input=input_text,
+        task_str="T2TT",
+        src_lang=source_language_code,
+        tgt_lang=target_language_code,
+    )
+    return str(out_texts[0])
+def run_asr(input_audio: str, target_language: str) -> str:
+    preprocess_audio(input_audio)
+    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
+    out_texts, _ = translator.predict(
+        input=input_audio,
+        task_str="ASR",
+        src_lang=target_language_code,
+        tgt_lang=target_language_code,
+    )
+    return str(out_texts[0])
+with gr.Blocks() as demo_s2st:
+    with gr.Row():
+        with gr.Column():
+            with gr.Group():
+                input_audio = gr.Audio(label="Input speech", type="filepath")
+                source_language = gr.Dropdown(
+                    label="Source language",
+                    choices=ASR_TARGET_LANGUAGE_NAMES,
+                    value="English",
+                )
+                target_language = gr.Dropdown(
+                    label="Target language",
+                    choices=S2ST_TARGET_LANGUAGE_NAMES,
+                    value=DEFAULT_TARGET_LANGUAGE,
+                )
+            btn = gr.Button("Translate")
+        with gr.Column():
+            with gr.Group():
+                output_audio = gr.Audio(
+                    label="Translated speech",
+                    autoplay=False,
+                    streaming=False,
+                    type="numpy",
+                )
+                output_text = gr.Textbox(label="Translated text")
+    gr.Examples(
+        examples=[
+            ["assets/sample_input.mp3", "English", "French"],
+            ["assets/sample_input.mp3", "English", "Mandarin Chinese"],
+            ["assets/sample_input_2.mp3", "English", "Hindi"],
+            ["assets/sample_input_2.mp3", "English", "Spanish"],
+        ],
+        inputs=[input_audio, source_language, target_language],
+        outputs=[output_audio, output_text],
+        fn=run_s2st,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
+    btn.click(
+        fn=run_s2st,
+        inputs=[input_audio, source_language, target_language],
+        outputs=[output_audio, output_text],
+        api_name="s2st",
+    )
+with gr.Blocks() as demo_s2tt:
+    with gr.Row():
+        with gr.Column():
+            with gr.Group():
+                input_audio = gr.Audio(label="Input speech", type="filepath")
+                source_language = gr.Dropdown(
+                    label="Source language",
+                    choices=ASR_TARGET_LANGUAGE_NAMES,
+                    value="English",
+                )
+                target_language = gr.Dropdown(
+                    label="Target language",
+                    choices=S2TT_TARGET_LANGUAGE_NAMES,
+                    value=DEFAULT_TARGET_LANGUAGE,
+                )
+            btn = gr.Button("Translate")
+        with gr.Column():
+            output_text = gr.Textbox(label="Translated text")
+    gr.Examples(
+        examples=[
+            ["assets/sample_input.mp3", "English", "French"],
+            ["assets/sample_input.mp3", "English", "Mandarin Chinese"],
+            ["assets/sample_input_2.mp3", "English", "Hindi"],
+            ["assets/sample_input_2.mp3", "English", "Spanish"],
+        ],
+        inputs=[input_audio, source_language, target_language],
+        outputs=output_text,
+        fn=run_s2tt,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
+    btn.click(
+        fn=run_s2tt,
+        inputs=[input_audio, source_language, target_language],
+        outputs=output_text,
+        api_name="s2tt",
+    )
+with gr.Blocks() as demo_t2st:
+    with gr.Row():
+        with gr.Column():
+            with gr.Group():
+                input_text = gr.Textbox(label="Input text")
+                with gr.Row():
+                    source_language = gr.Dropdown(
+                        label="Source language",
+                        choices=TEXT_SOURCE_LANGUAGE_NAMES,
+                        value="English",
+                    )
+                    target_language = gr.Dropdown(
+                        label="Target language",
+                        choices=T2ST_TARGET_LANGUAGE_NAMES,
+                        value=DEFAULT_TARGET_LANGUAGE,
+                    )
+            btn = gr.Button("Translate")
+        with gr.Column():
+            with gr.Group():
+                output_audio = gr.Audio(
+                    label="Translated speech",
+                    autoplay=False,
+                    streaming=False,
+                    type="numpy",
+                )
+                output_text = gr.Textbox(label="Translated text")
+    gr.Examples(
+        examples=[
+            [
+                "My favorite animal is the elephant.",
+                "English",
+                "French",
+            ],
+            [
+                "My favorite animal is the elephant.",
+                "English",
+                "Mandarin Chinese",
+            ],
+            [
+                "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                "English",
+                "Hindi",
+            ],
+            [
+                "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                "English",
+                "Spanish",
+            ],
+        ],
+        inputs=[input_text, source_language, target_language],
+        outputs=[output_audio, output_text],
+        fn=run_t2st,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
+    gr.on(
+        triggers=[input_text.submit, btn.click],
+        fn=run_t2st,
+        inputs=[input_text, source_language, target_language],
+        outputs=[output_audio, output_text],
+        api_name="t2st",
+    )
+with gr.Blocks() as demo_t2tt:
+    with gr.Row():
+        with gr.Column():
+            with gr.Group():
+                input_text = gr.Textbox(label="Input text")
+                with gr.Row():
+                    source_language = gr.Dropdown(
+                        label="Source language",
+                        choices=TEXT_SOURCE_LANGUAGE_NAMES,
+                        value="English",
+                    )
+                    target_language = gr.Dropdown(
+                        label="Target language",
+                        choices=T2TT_TARGET_LANGUAGE_NAMES,
+                        value=DEFAULT_TARGET_LANGUAGE,
+                    )
+            btn = gr.Button("Translate")
+        with gr.Column():
+            output_text = gr.Textbox(label="Translated text")
+    gr.Examples(
+        examples=[
+            [
+                "My favorite animal is the elephant.",
+                "English",
+                "French",
+            ],
+            [
+                "My favorite animal is the elephant.",
+                "English",
+                "Mandarin Chinese",
+            ],
+            [
+                "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                "English",
+                "Hindi",
+            ],
+            [
+                "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
+                "English",
+                "Spanish",
+            ],
+        ],
+        inputs=[input_text, source_language, target_language],
+        outputs=output_text,
+        fn=run_t2tt,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
+    gr.on(
+        triggers=[input_text.submit, btn.click],
+        fn=run_t2tt,
+        inputs=[input_text, source_language, target_language],
+        outputs=output_text,
+        api_name="t2tt",
+    )
+with gr.Blocks() as demo_asr:
+    with gr.Row():
+        with gr.Column():
+            with gr.Group():
+                input_audio = gr.Audio(label="Input speech", type="filepath")
+                target_language = gr.Dropdown(
+                    label="Target language",
+                    choices=ASR_TARGET_LANGUAGE_NAMES,
+                    value=DEFAULT_TARGET_LANGUAGE,
+                )
+            btn = gr.Button("Translate")
+        with gr.Column():
+            output_text = gr.Textbox(label="Translated text")
+    gr.Examples(
+        examples=[
+            ["assets/sample_input.mp3", "English"],
+            ["assets/sample_input_2.mp3", "English"],
+        ],
+        inputs=[input_audio, target_language],
+        outputs=output_text,
+        fn=run_asr,
+        cache_examples=CACHE_EXAMPLES,
+        api_name=False,
+    )
+    btn.click(
+        fn=run_asr,
+        inputs=[input_audio, target_language],
+        outputs=output_text,
+        api_name="asr",
+    )
+with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(
+        value="Duplicate Space for private use",
+        elem_id="duplicate-button",
+        visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
+    )
+    with gr.Tabs():
+        with gr.Tab(label="S2ST"):
+            demo_s2st.render()
+        with gr.Tab(label="S2TT"):
+            demo_s2tt.render()
+        with gr.Tab(label="T2ST"):
+            demo_t2st.render()
+        with gr.Tab(label="T2TT"):
+            demo_t2tt.render()
+        with gr.Tab(label="ASR"):
+            demo_asr.render()
+if __name__ == "__main__":
+    demo.queue(max_size=50).launch()

assets/sample_input.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:982369687f05bf8fcd6923c4ffcccda0fcce92f44eceae5a9d00a431f07ea87b
+size 10272

assets/sample_input_2.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a505a4641e3f5f0ddec9508832793aa20e63d2545530b66bc04a9bd19a742e6
+size 30624

lang_list.py ADDED Viewed

	@@ -0,0 +1,255 @@

+# Language dict
+language_code_to_name = {
+    "afr": "Afrikaans",
+    "amh": "Amharic",
+    "arb": "Modern Standard Arabic",
+    "ary": "Moroccan Arabic",
+    "arz": "Egyptian Arabic",
+    "asm": "Assamese",
+    "ast": "Asturian",
+    "azj": "North Azerbaijani",
+    "bel": "Belarusian",
+    "ben": "Bengali",
+    "bos": "Bosnian",
+    "bul": "Bulgarian",
+    "cat": "Catalan",
+    "ceb": "Cebuano",
+    "ces": "Czech",
+    "ckb": "Central Kurdish",
+    "cmn": "Mandarin Chinese",
+    "cym": "Welsh",
+    "dan": "Danish",
+    "deu": "German",
+    "ell": "Greek",
+    "eng": "English",
+    "est": "Estonian",
+    "eus": "Basque",
+    "fin": "Finnish",
+    "fra": "French",
+    "gaz": "West Central Oromo",
+    "gle": "Irish",
+    "glg": "Galician",
+    "guj": "Gujarati",
+    "heb": "Hebrew",
+    "hin": "Hindi",
+    "hrv": "Croatian",
+    "hun": "Hungarian",
+    "hye": "Armenian",
+    "ibo": "Igbo",
+    "ind": "Indonesian",
+    "isl": "Icelandic",
+    "ita": "Italian",
+    "jav": "Javanese",
+    "jpn": "Japanese",
+    "kam": "Kamba",
+    "kan": "Kannada",
+    "kat": "Georgian",
+    "kaz": "Kazakh",
+    "kea": "Kabuverdianu",
+    "khk": "Halh Mongolian",
+    "khm": "Khmer",
+    "kir": "Kyrgyz",
+    "kor": "Korean",
+    "lao": "Lao",
+    "lit": "Lithuanian",
+    "ltz": "Luxembourgish",
+    "lug": "Ganda",
+    "luo": "Luo",
+    "lvs": "Standard Latvian",
+    "mai": "Maithili",
+    "mal": "Malayalam",
+    "mar": "Marathi",
+    "mkd": "Macedonian",
+    "mlt": "Maltese",
+    "mni": "Meitei",
+    "mya": "Burmese",
+    "nld": "Dutch",
+    "nno": "Norwegian Nynorsk",
+    "nob": "Norwegian Bokm\u00e5l",
+    "npi": "Nepali",
+    "nya": "Nyanja",
+    "oci": "Occitan",
+    "ory": "Odia",
+    "pan": "Punjabi",
+    "pbt": "Southern Pashto",
+    "pes": "Western Persian",
+    "pol": "Polish",
+    "por": "Portuguese",
+    "ron": "Romanian",
+    "rus": "Russian",
+    "slk": "Slovak",
+    "slv": "Slovenian",
+    "sna": "Shona",
+    "snd": "Sindhi",
+    "som": "Somali",
+    "spa": "Spanish",
+    "srp": "Serbian",
+    "swe": "Swedish",
+    "swh": "Swahili",
+    "tam": "Tamil",
+    "tel": "Telugu",
+    "tgk": "Tajik",
+    "tgl": "Tagalog",
+    "tha": "Thai",
+    "tur": "Turkish",
+    "ukr": "Ukrainian",
+    "urd": "Urdu",
+    "uzn": "Northern Uzbek",
+    "vie": "Vietnamese",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "yue": "Cantonese",
+    "zlm": "Colloquial Malay",
+    "zsm": "Standard Malay",
+    "zul": "Zulu",
+}
+LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
+# Source langs: S2ST / S2TT / ASR don't need source lang
+# T2TT / T2ST use this
+text_source_language_codes = [
+    "afr",
+    "amh",
+    "arb",
+    "ary",
+    "arz",
+    "asm",
+    "azj",
+    "bel",
+    "ben",
+    "bos",
+    "bul",
+    "cat",
+    "ceb",
+    "ces",
+    "ckb",
+    "cmn",
+    "cym",
+    "dan",
+    "deu",
+    "ell",
+    "eng",
+    "est",
+    "eus",
+    "fin",
+    "fra",
+    "gaz",
+    "gle",
+    "glg",
+    "guj",
+    "heb",
+    "hin",
+    "hrv",
+    "hun",
+    "hye",
+    "ibo",
+    "ind",
+    "isl",
+    "ita",
+    "jav",
+    "jpn",
+    "kan",
+    "kat",
+    "kaz",
+    "khk",
+    "khm",
+    "kir",
+    "kor",
+    "lao",
+    "lit",
+    "lug",
+    "luo",
+    "lvs",
+    "mai",
+    "mal",
+    "mar",
+    "mkd",
+    "mlt",
+    "mni",
+    "mya",
+    "nld",
+    "nno",
+    "nob",
+    "npi",
+    "nya",
+    "ory",
+    "pan",
+    "pbt",
+    "pes",
+    "pol",
+    "por",
+    "ron",
+    "rus",
+    "slk",
+    "slv",
+    "sna",
+    "snd",
+    "som",
+    "spa",
+    "srp",
+    "swe",
+    "swh",
+    "tam",
+    "tel",
+    "tgk",
+    "tgl",
+    "tha",
+    "tur",
+    "ukr",
+    "urd",
+    "uzn",
+    "vie",
+    "yor",
+    "yue",
+    "zsm",
+    "zul",
+]
+TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
+# Target langs:
+# S2ST / T2ST
+s2st_target_language_codes = [
+    "eng",
+    "arb",
+    "ben",
+    "cat",
+    "ces",
+    "cmn",
+    "cym",
+    "dan",
+    "deu",
+    "est",
+    "fin",
+    "fra",
+    "hin",
+    "ind",
+    "ita",
+    "jpn",
+    "kor",
+    "mlt",
+    "nld",
+    "pes",
+    "pol",
+    "por",
+    "ron",
+    "rus",
+    "slk",
+    "spa",
+    "swe",
+    "swh",
+    "tel",
+    "tgl",
+    "tha",
+    "tur",
+    "ukr",
+    "urd",
+    "uzn",
+    "vie",
+]
+S2ST_TARGET_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in s2st_target_language_codes])
+T2ST_TARGET_LANGUAGE_NAMES = S2ST_TARGET_LANGUAGE_NAMES
+# S2TT / T2TT / ASR
+S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
+T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
+ASR_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==4.5.0
+omegaconf==2.3.0
+torch==2.1.0
+torchaudio==2.1.0

style.css ADDED Viewed

	@@ -0,0 +1,10 @@

+h1 {
+  text-align: center;
+}
+#duplicate-button {
+  margin: auto;
+  color: #fff;
+  background: #1565c0;
+  border-radius: 100vh;
+}

whl/seamless_communication-1.0.0-py3-none-any.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
+size 201552