Spaces:
Running
Running
Add large-v1 and large-v2 to CLIs
Browse files
app.py
CHANGED
@@ -57,6 +57,8 @@ LANGUAGES = [
|
|
57 |
"Hausa", "Bashkir", "Javanese", "Sundanese"
|
58 |
]
|
59 |
|
|
|
|
|
60 |
class WhisperTranscriber:
|
61 |
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None, vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES):
|
62 |
self.model_cache = ModelCache()
|
@@ -349,7 +351,7 @@ def create_ui(input_audio_max_duration, share=False, server_name: str = None, se
|
|
349 |
ui_article = "Read the [documentation here](https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/docs/options.md)"
|
350 |
|
351 |
demo = gr.Interface(fn=ui.transcribe_webui, description=ui_description, article=ui_article, inputs=[
|
352 |
-
gr.Dropdown(choices=
|
353 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
354 |
gr.Text(label="URL (YouTube, etc.)"),
|
355 |
gr.File(label="Upload Files", file_count="multiple"),
|
@@ -377,7 +379,7 @@ if __name__ == '__main__':
|
|
377 |
parser.add_argument("--share", type=bool, default=False, help="True to share the app on HuggingFace.")
|
378 |
parser.add_argument("--server_name", type=str, default=None, help="The host or IP to bind to. If None, bind to localhost.")
|
379 |
parser.add_argument("--server_port", type=int, default=7860, help="The port to bind to.")
|
380 |
-
parser.add_argument("--default_model_name", type=str, default="medium", help="The default model name.")
|
381 |
parser.add_argument("--default_vad", type=str, default="silero-vad", help="The default VAD.")
|
382 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
383 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
|
|
57 |
"Hausa", "Bashkir", "Javanese", "Sundanese"
|
58 |
]
|
59 |
|
60 |
+
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2"]
|
61 |
+
|
62 |
class WhisperTranscriber:
|
63 |
def __init__(self, input_audio_max_duration: float = DEFAULT_INPUT_AUDIO_MAX_DURATION, vad_process_timeout: float = None, vad_cpu_cores: int = 1, delete_uploaded_files: bool = DELETE_UPLOADED_FILES):
|
64 |
self.model_cache = ModelCache()
|
|
|
351 |
ui_article = "Read the [documentation here](https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/docs/options.md)"
|
352 |
|
353 |
demo = gr.Interface(fn=ui.transcribe_webui, description=ui_description, article=ui_article, inputs=[
|
354 |
+
gr.Dropdown(choices=WHISPER_MODELS, value=default_model_name, label="Model"),
|
355 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
356 |
gr.Text(label="URL (YouTube, etc.)"),
|
357 |
gr.File(label="Upload Files", file_count="multiple"),
|
|
|
379 |
parser.add_argument("--share", type=bool, default=False, help="True to share the app on HuggingFace.")
|
380 |
parser.add_argument("--server_name", type=str, default=None, help="The host or IP to bind to. If None, bind to localhost.")
|
381 |
parser.add_argument("--server_port", type=int, default=7860, help="The port to bind to.")
|
382 |
+
parser.add_argument("--default_model_name", type=str, choices=WHISPER_MODELS, default="medium", help="The default model name.")
|
383 |
parser.add_argument("--default_vad", type=str, default="silero-vad", help="The default VAD.")
|
384 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
385 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
cli.py
CHANGED
@@ -6,7 +6,7 @@ import warnings
|
|
6 |
import numpy as np
|
7 |
|
8 |
import torch
|
9 |
-
from app import LANGUAGES, WhisperTranscriber
|
10 |
from src.download import download_url
|
11 |
|
12 |
from src.utils import optional_float, optional_int, str2bool
|
@@ -15,7 +15,7 @@ from src.whisperContainer import WhisperContainer
|
|
15 |
def cli():
|
16 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
17 |
parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
|
18 |
-
parser.add_argument("--model", default="small", choices=
|
19 |
parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
|
20 |
parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
|
21 |
parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
|
|
|
6 |
import numpy as np
|
7 |
|
8 |
import torch
|
9 |
+
from app import LANGUAGES, WHISPER_MODELS, WhisperTranscriber
|
10 |
from src.download import download_url
|
11 |
|
12 |
from src.utils import optional_float, optional_int, str2bool
|
|
|
15 |
def cli():
|
16 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
17 |
parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
|
18 |
+
parser.add_argument("--model", default="small", choices=WHISPER_MODELS, help="name of the Whisper model to use")
|
19 |
parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
|
20 |
parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
|
21 |
parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
|