chingus commited on
Commit
455359d
·
1 Parent(s): c89eb2d

updated dependencies

Browse files
Files changed (2) hide show
  1. app.py +9 -116
  2. assets.py +110 -0
app.py CHANGED
@@ -1,7 +1,6 @@
1
- # # import os
2
- # # os.system("pip install git+https://github.com/openai/whisper.git")
3
- # import gradio as gr
4
- # import whisper
5
 
6
  import gradio as gr
7
  import whisper
@@ -10,109 +9,8 @@ import os
10
  import numpy as np
11
  from datetime import datetime
12
 
13
- LANGUAGES = {
14
- "en": "english",
15
- "zh": "chinese",
16
- "de": "german",
17
- "es": "spanish",
18
- "ru": "russian",
19
- "ko": "korean",
20
- "fr": "french",
21
- "ja": "japanese",
22
- "pt": "portuguese",
23
- "tr": "turkish",
24
- "pl": "polish",
25
- "ca": "catalan",
26
- "nl": "dutch",
27
- "ar": "arabic",
28
- "sv": "swedish",
29
- "it": "italian",
30
- "id": "indonesian",
31
- "hi": "hindi",
32
- "fi": "finnish",
33
- "vi": "vietnamese",
34
- "iw": "hebrew",
35
- "uk": "ukrainian",
36
- "el": "greek",
37
- "ms": "malay",
38
- "cs": "czech",
39
- "ro": "romanian",
40
- "da": "danish",
41
- "hu": "hungarian",
42
- "ta": "tamil",
43
- "no": "norwegian",
44
- "th": "thai",
45
- "ur": "urdu",
46
- "hr": "croatian",
47
- "bg": "bulgarian",
48
- "lt": "lithuanian",
49
- "la": "latin",
50
- "mi": "maori",
51
- "ml": "malayalam",
52
- "cy": "welsh",
53
- "sk": "slovak",
54
- "te": "telugu",
55
- "fa": "persian",
56
- "lv": "latvian",
57
- "bn": "bengali",
58
- "sr": "serbian",
59
- "az": "azerbaijani",
60
- "sl": "slovenian",
61
- "kn": "kannada",
62
- "et": "estonian",
63
- "mk": "macedonian",
64
- "br": "breton",
65
- "eu": "basque",
66
- "is": "icelandic",
67
- "hy": "armenian",
68
- "ne": "nepali",
69
- "mn": "mongolian",
70
- "bs": "bosnian",
71
- "kk": "kazakh",
72
- "sq": "albanian",
73
- "sw": "swahili",
74
- "gl": "galician",
75
- "mr": "marathi",
76
- "pa": "punjabi",
77
- "si": "sinhala",
78
- "km": "khmer",
79
- "sn": "shona",
80
- "yo": "yoruba",
81
- "so": "somali",
82
- "af": "afrikaans",
83
- "oc": "occitan",
84
- "ka": "georgian",
85
- "be": "belarusian",
86
- "tg": "tajik",
87
- "sd": "sindhi",
88
- "gu": "gujarati",
89
- "am": "amharic",
90
- "yi": "yiddish",
91
- "lo": "lao",
92
- "uz": "uzbek",
93
- "fo": "faroese",
94
- "ht": "haitian creole",
95
- "ps": "pashto",
96
- "tk": "turkmen",
97
- "nn": "nynorsk",
98
- "mt": "maltese",
99
- "sa": "sanskrit",
100
- "lb": "luxembourgish",
101
- "my": "myanmar",
102
- "bo": "tibetan",
103
- "tl": "tagalog",
104
- "mg": "malagasy",
105
- "as": "assamese",
106
- "tt": "tatar",
107
- "haw": "hawaiian",
108
- "ln": "lingala",
109
- "ha": "hausa",
110
- "ba": "bashkir",
111
- "jw": "javanese",
112
- "su": "sundanese",
113
- }
114
-
115
- lang_detect = ['tiny', 'base', 'small', 'medium', 'large']
116
  def sendToWhisper(audio_record, audio_upload, task, models_selected, language_toggle, language_selected, without_timestamps):
117
  results = []
118
 
@@ -136,7 +34,7 @@ def sendToWhisper(audio_record, audio_upload, task, models_selected, language_to
136
  options = whisper.DecodingOptions(fp16 = False, without_timestamps=without_timestamps, task=task, language=language_selected)
137
  language = ""
138
  prob = 0
139
- if model_name in lang_detect:
140
  _, probs = model.detect_language(mel)
141
  language = max(probs, key=probs.get)
142
  prob = probs[language]
@@ -149,14 +47,9 @@ def sendToWhisper(audio_record, audio_upload, task, models_selected, language_to
149
 
150
  avail_models = whisper.available_models()
151
 
152
- css = """
153
- #audio_inputs{
154
- height:100px;
155
- max-height:100px;
156
- }
157
- """
158
 
159
- with gr.Blocks(css=css) as demo:
 
160
  gr.Markdown("This is a demo to use Open AI's Speech to Text (ASR) Model: Whisper. Learn more about the models here on [Github](https://github.com/openai/whisper/search?q=DecodingOptions&type=) FYI: The larger models take a lot longer to transcribe the text :)")
161
  gr.Markdown("Here are sample audio files to try out: [Sample Audio](https://drive.google.com/drive/folders/1qYek06ZVeKr9f5Jf35eqi-9CnjNIp98u?usp=sharing)")
162
  gr.Markdown("Built by:[@davidtsong](https://twitter.com/davidtsong)")
@@ -175,7 +68,7 @@ with gr.Blocks(css=css) as demo:
175
  with gr.Accordion("Settings", open=False):
176
  task = gr.Dropdown(["transcribe", "translate"], label="Task", value="transcribe")
177
  language_toggle = gr.Dropdown(["Automatic", "Manual"], label="Language Selection", value="Automatic")
178
- language_selected = gr.Dropdown(list(LANGUAGES.keys()), label="Language")
179
  without_timestamps = gr.Checkbox(label="Without timestamps",value=True)
180
  submit = gr.Button(label="Run")
181
 
 
1
+ import os
2
+ os.system("pip install git+https://github.com/openai/whisper.git")
3
+
 
4
 
5
  import gradio as gr
6
  import whisper
 
9
  import numpy as np
10
  from datetime import datetime
11
 
12
+ import assets
13
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def sendToWhisper(audio_record, audio_upload, task, models_selected, language_toggle, language_selected, without_timestamps):
15
  results = []
16
 
 
34
  options = whisper.DecodingOptions(fp16 = False, without_timestamps=without_timestamps, task=task, language=language_selected)
35
  language = ""
36
  prob = 0
37
+ if model_name in assets.lang_detect:
38
  _, probs = model.detect_language(mel)
39
  language = max(probs, key=probs.get)
40
  prob = probs[language]
 
47
 
48
  avail_models = whisper.available_models()
49
 
 
 
 
 
 
 
50
 
51
+
52
+ with gr.Blocks(css=assets.css) as demo:
53
  gr.Markdown("This is a demo to use Open AI's Speech to Text (ASR) Model: Whisper. Learn more about the models here on [Github](https://github.com/openai/whisper/search?q=DecodingOptions&type=) FYI: The larger models take a lot longer to transcribe the text :)")
54
  gr.Markdown("Here are sample audio files to try out: [Sample Audio](https://drive.google.com/drive/folders/1qYek06ZVeKr9f5Jf35eqi-9CnjNIp98u?usp=sharing)")
55
  gr.Markdown("Built by:[@davidtsong](https://twitter.com/davidtsong)")
 
68
  with gr.Accordion("Settings", open=False):
69
  task = gr.Dropdown(["transcribe", "translate"], label="Task", value="transcribe")
70
  language_toggle = gr.Dropdown(["Automatic", "Manual"], label="Language Selection", value="Automatic")
71
+ language_selected = gr.Dropdown(list(assets.LANGUAGES.keys()), label="Language")
72
  without_timestamps = gr.Checkbox(label="Without timestamps",value=True)
73
  submit = gr.Button(label="Run")
74
 
assets.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LANGUAGES = {
2
+ "en": "english",
3
+ "zh": "chinese",
4
+ "de": "german",
5
+ "es": "spanish",
6
+ "ru": "russian",
7
+ "ko": "korean",
8
+ "fr": "french",
9
+ "ja": "japanese",
10
+ "pt": "portuguese",
11
+ "tr": "turkish",
12
+ "pl": "polish",
13
+ "ca": "catalan",
14
+ "nl": "dutch",
15
+ "ar": "arabic",
16
+ "sv": "swedish",
17
+ "it": "italian",
18
+ "id": "indonesian",
19
+ "hi": "hindi",
20
+ "fi": "finnish",
21
+ "vi": "vietnamese",
22
+ "iw": "hebrew",
23
+ "uk": "ukrainian",
24
+ "el": "greek",
25
+ "ms": "malay",
26
+ "cs": "czech",
27
+ "ro": "romanian",
28
+ "da": "danish",
29
+ "hu": "hungarian",
30
+ "ta": "tamil",
31
+ "no": "norwegian",
32
+ "th": "thai",
33
+ "ur": "urdu",
34
+ "hr": "croatian",
35
+ "bg": "bulgarian",
36
+ "lt": "lithuanian",
37
+ "la": "latin",
38
+ "mi": "maori",
39
+ "ml": "malayalam",
40
+ "cy": "welsh",
41
+ "sk": "slovak",
42
+ "te": "telugu",
43
+ "fa": "persian",
44
+ "lv": "latvian",
45
+ "bn": "bengali",
46
+ "sr": "serbian",
47
+ "az": "azerbaijani",
48
+ "sl": "slovenian",
49
+ "kn": "kannada",
50
+ "et": "estonian",
51
+ "mk": "macedonian",
52
+ "br": "breton",
53
+ "eu": "basque",
54
+ "is": "icelandic",
55
+ "hy": "armenian",
56
+ "ne": "nepali",
57
+ "mn": "mongolian",
58
+ "bs": "bosnian",
59
+ "kk": "kazakh",
60
+ "sq": "albanian",
61
+ "sw": "swahili",
62
+ "gl": "galician",
63
+ "mr": "marathi",
64
+ "pa": "punjabi",
65
+ "si": "sinhala",
66
+ "km": "khmer",
67
+ "sn": "shona",
68
+ "yo": "yoruba",
69
+ "so": "somali",
70
+ "af": "afrikaans",
71
+ "oc": "occitan",
72
+ "ka": "georgian",
73
+ "be": "belarusian",
74
+ "tg": "tajik",
75
+ "sd": "sindhi",
76
+ "gu": "gujarati",
77
+ "am": "amharic",
78
+ "yi": "yiddish",
79
+ "lo": "lao",
80
+ "uz": "uzbek",
81
+ "fo": "faroese",
82
+ "ht": "haitian creole",
83
+ "ps": "pashto",
84
+ "tk": "turkmen",
85
+ "nn": "nynorsk",
86
+ "mt": "maltese",
87
+ "sa": "sanskrit",
88
+ "lb": "luxembourgish",
89
+ "my": "myanmar",
90
+ "bo": "tibetan",
91
+ "tl": "tagalog",
92
+ "mg": "malagasy",
93
+ "as": "assamese",
94
+ "tt": "tatar",
95
+ "haw": "hawaiian",
96
+ "ln": "lingala",
97
+ "ha": "hausa",
98
+ "ba": "bashkir",
99
+ "jw": "javanese",
100
+ "su": "sundanese",
101
+ }
102
+
103
+ lang_detect = ['tiny', 'base', 'small', 'medium', 'large']
104
+
105
+ css = """
106
+ #audio_inputs{
107
+ height:100px;
108
+ max-height:100px;
109
+ }
110
+ """