Spaces:

aadnk
/

whisper-webui

Runtime error

App Files Files Community

aadnk commited on Sep 22, 2022

Commit

05a2178

1 Parent(s): 23c3153

Initial commit

Browse files

Files changed (4) hide show

.gitignore +4 -0
app.py +141 -0
requirements.txt +2 -0
utils.py +54 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class

app.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from io import StringIO
+import gradio as gr
+from utils import write_vtt
+import whisper
+#import os
+#os.system("pip install git+https://github.com/openai/whisper.git")
+LANGUAGES = [
+    "English",
+    "Chinese",
+    "German",
+    "Spanish",
+    "Russian",
+    "Korean",
+    "French",
+    "Japanese",
+    "Portuguese",
+    "Turkish",
+    "Polish",
+    "Catalan",
+    "Dutch",
+    "Arabic",
+    "Swedish",
+    "Italian",
+    "Indonesian",
+    "Hindi",
+    "Finnish",
+    "Vietnamese",
+    "Hebrew",
+    "Ukrainian",
+    "Greek",
+    "Malay",
+    "Czech",
+    "Romanian",
+    "Danish",
+    "Hungarian",
+    "Tamil",
+    "Norwegian",
+    "Thai",
+    "Urdu",
+    "Croatian",
+    "Bulgarian",
+    "Lithuanian",
+    "Latin",
+    "Maori",
+    "Malayalam",
+    "Welsh",
+    "Slovak",
+    "Telugu",
+    "Persian",
+    "Latvian",
+    "Bengali",
+    "Serbian",
+    "Azerbaijani",
+    "Slovenian",
+    "Kannada",
+    "Estonian",
+    "Macedonian",
+    "Breton",
+    "Basque",
+    "Icelandic",
+    "Armenian",
+    "Nepali",
+    "Mongolian",
+    "Bosnian",
+    "Kazakh",
+    "Albanian",
+    "Swahili",
+    "Galician",
+    "Marathi",
+    "Punjabi",
+    "Sinhala",
+    "Khmer",
+    "Shona",
+    "Yoruba",
+    "Somali",
+    "Afrikaans",
+    "Occitan",
+    "Georgian",
+    "Belarusian",
+    "Tajik",
+    "Sindhi",
+    "Gujarati",
+    "Amharic",
+    "Yiddish",
+    "Lao",
+    "Uzbek",
+    "Faroese",
+    "Haitian Creole",
+    "Pashto",
+    "Turkmen",
+    "Nynorsk",
+    "Maltese",
+    "Sanskrit",
+    "Luxembourgish",
+    "Myanmar",
+    "Tibetan",
+    "Tagalog",
+    "Malagasy",
+    "Assamese",
+    "Tatar",
+    "Hawaiian",
+    "Lingala",
+    "Hausa",
+    "Bashkir",
+    "Javanese",
+    "Sundanese"
+]
+model_cache = dict()
+def greet(modelName, languageName, uploadFile, microphoneData, task):
+    source = uploadFile if uploadFile is not None else microphoneData
+    selectedLanguage = languageName.lower() if len(languageName) > 0 else None
+    selectedModel = modelName if modelName is not None else "base"
+    model = model_cache.get(selectedModel, None)
+    if not model:
+        model = whisper.load_model(selectedModel)
+        model_cache[selectedModel] = model
+    result = model.transcribe(source, language=selectedLanguage, task=task)
+    segmentStream = StringIO()
+    write_vtt(result["segments"], file=segmentStream)
+    segmentStream.seek(0)
+    return result["text"], segmentStream.read()
+demo = gr.Interface(fn=greet, description="Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.", inputs=[
+    gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
+    gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
+    gr.Audio(source="upload", type="filepath", label="Upload Audio"),
+    gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
+    gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
+], outputs=[gr.Text(label="Transcription"), gr.Text(label="Segments")])
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ git+https://github.com/openai/whisper.git
2	+ transformers

utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import zlib
+from typing import Iterator, TextIO
+def exact_div(x, y):
+    assert x % y == 0
+    return x // y
+def str2bool(string):
+    str2val = {"True": True, "False": False}
+    if string in str2val:
+        return str2val[string]
+    else:
+        raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
+def optional_int(string):
+    return None if string == "None" else int(string)
+def optional_float(string):
+    return None if string == "None" else float(string)
+def compression_ratio(text) -> float:
+    return len(text) / len(zlib.compress(text.encode("utf-8")))
+def format_timestamp(seconds: float):
+    assert seconds >= 0, "non-negative timestamp expected"
+    milliseconds = round(seconds * 1000.0)
+    hours = milliseconds // 3_600_000
+    milliseconds -= hours * 3_600_000
+    minutes = milliseconds // 60_000
+    milliseconds -= minutes * 60_000
+    seconds = milliseconds // 1_000
+    milliseconds -= seconds * 1_000
+    return (f"{hours}:" if hours > 0 else "") + f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
+def write_vtt(transcript: Iterator[dict], file: TextIO):
+    print("WEBVTT\n", file=file)
+    for segment in transcript:
+        print(
+            f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
+            f"{segment['text'].replace('-->', '->')}\n",
+            file=file,
+            flush=True,
+        )