whisper-webui-translate

Running

App Files Files Community

avans06 commited on Oct 12, 2024

Commit

f9216c5

•

1 Parent(s): f7c9c25

Upgrade the project's Gradio version to 5.0.2.

Browse files

Files changed (6) hide show

README.md +1 -1
app.py +32 -28
config.json5 +6 -0
requirements-fasterWhisper.txt +4 -4
requirements-whisper.txt +5 -4
requirements.txt +4 -4

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ✨
 colorFrom: blue
 colorTo: purple
 sdk: gradio
-sdk_version: 3.50.2
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 5.0.2
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -451,9 +451,10 @@ class WhisperTranscriber:
                     download.insert(0, downloadAllPath)
-                filterLogText = [gr.Text.update(visible=False)]
                 if filterLogs:
-                    filterLogText = [gr.Text.update(visible=True, value=filterLogs)]
                 return [download, text, vtt] + filterLogText
@@ -1035,36 +1036,36 @@ def create_ui(app_config: ApplicationConfig):
         madlad400_models = list(filter(lambda madlad400: "ct2" in madlad400, madlad400_models))
     common_whisper_inputs = lambda : {
-        gr.Dropdown(label="Whisper - Model (for audio)", choices=whisper_models, value=app_config.default_model_name, elem_id="whisperModelName"),
-        gr.Dropdown(label="Whisper - Language", choices=sorted(get_lang_whisper_names()), value=app_config.language, elem_id="whisperLangName"),
     }
     common_m2m100_inputs = lambda : {
-        gr.Dropdown(label="M2M100 - Model (for translate)", choices=m2m100_models, elem_id="m2m100ModelName"),
-        gr.Dropdown(label="M2M100 - Language", choices=sorted(get_lang_m2m100_names()), elem_id="m2m100LangName"),
     }
     common_nllb_inputs = lambda : {
-        gr.Dropdown(label="NLLB - Model (for translate)", choices=nllb_models, elem_id="nllbModelName"),
-        gr.Dropdown(label="NLLB - Language", choices=sorted(get_lang_nllb_names()), elem_id="nllbLangName"),
     }
     common_mt5_inputs = lambda : {
-        gr.Dropdown(label="MT5 - Model (for translate)", choices=mt5_models, elem_id="mt5ModelName"),
-        gr.Dropdown(label="MT5 - Language", choices=sorted(get_lang_m2m100_names(["en", "ja", "zh"])), elem_id="mt5LangName"),
     }
     common_ALMA_inputs = lambda : {
-        gr.Dropdown(label="ALMA - Model (for translate)", choices=ALMA_models, elem_id="ALMAModelName"),
-        gr.Dropdown(label="ALMA - Language", choices=sort_lang_by_whisper_codes(["en", "de", "cs", "is", "ru", "zh", "ja"]), elem_id="ALMALangName"),
     }
     common_madlad400_inputs = lambda : {
-        gr.Dropdown(label="madlad400 - Model (for translate)", choices=madlad400_models, elem_id="madlad400ModelName"),
-        gr.Dropdown(label="madlad400 - Language", choices=sorted(get_lang_m2m100_names()), elem_id="madlad400LangName"),
     }
     common_seamless_inputs = lambda : {
-        gr.Dropdown(label="seamless - Model (for translate)", choices=seamless_models, elem_id="seamlessModelName"),
-        gr.Dropdown(label="seamless - Language", choices=sorted(get_lang_seamlessT_Tx_names()), elem_id="seamlessLangName"),
     }
     common_Llama_inputs = lambda : {
-        gr.Dropdown(label="Llama - Model (for translate)", choices=Llama_models, elem_id="LlamaModelName"),
-        gr.Dropdown(label="Llama - Language", choices=sorted(get_lang_m2m100_names()), elem_id="LlamaLangName"),
     }
     common_translation_inputs = lambda : {
@@ -1131,8 +1132,10 @@ def create_ui(app_config: ApplicationConfig):
     def create_transcribe(uiDescription: str, isQueueMode: bool, isFull: bool = False):
         with gr.Blocks() as transcribe:
-            translateInput = gr.State(value="m2m100", elem_id = "translateInput")
-            sourceInput = gr.State(value="urlData", elem_id = "sourceInput")
             gr.Markdown(uiDescription)
             with gr.Row():
                 with gr.Column():
@@ -1173,8 +1176,8 @@ def create_ui(app_config: ApplicationConfig):
                             inputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
                         with gr.Tab(label="Upload") as UploadTab:
                             inputDict.update({gr.File(label="Upload Files", file_count="multiple", elem_id = "multipleFiles")})
-                        with gr.Tab(label="Microphone") as MicTab:
-                            inputDict.update({gr.Audio(source="microphone", type="filepath", label="Microphone Input", elem_id = "microphoneData")})
                         UrlTab.select(fn=lambda: "urlData", inputs = [], outputs= [sourceInput] )
                         UploadTab.select(fn=lambda: "multipleFiles", inputs = [], outputs= [sourceInput] )
                         MicTab.select(fn=lambda: "microphoneData", inputs = [], outputs= [sourceInput] )
@@ -1232,7 +1235,7 @@ def create_ui(app_config: ApplicationConfig):
             if readmeMd is not None:
                 with gr.Accordion("README.md", open=False):
                     gr.Markdown(readmeMd)
             inputDict.update({translateInput, sourceInput})
             submitBtn.click(fn=ui.transcribe_entry_progress if isQueueMode else ui.transcribe_entry,
                         inputs=inputDict, outputs=outputs)
@@ -1241,7 +1244,8 @@ def create_ui(app_config: ApplicationConfig):
     def create_translation(isQueueMode: bool):
         with gr.Blocks() as translation:
-            translateInput = gr.State(value="m2m100", elem_id = "translateInput")
             with gr.Row():
                 with gr.Column():
                     submitBtn = gr.Button("Submit", variant="primary")
@@ -1276,7 +1280,7 @@ def create_ui(app_config: ApplicationConfig):
                         llamaTab.select(fn=lambda: "Llama", inputs = [], outputs= [translateInput] )
                     with gr.Column():
                         inputDict.update({
-                            gr.Dropdown(label="Input - Language", choices=sorted(get_lang_whisper_names()), value=app_config.language, elem_id="inputLangName"),
                             gr.Text(lines=5, label="Input - Text", elem_id="inputText", elem_classes="scroll-show"),
                         })
                     with gr.Column():
@@ -1289,7 +1293,7 @@ def create_ui(app_config: ApplicationConfig):
             if translateModelMd is not None:
                 with gr.Accordion("docs/translateModel.md", open=False):
                     gr.Markdown(translateModelMd)
             inputDict.update({translateInput})
             submitBtn.click(fn=ui.translation_entry_progress if isQueueMode else ui.translation_entry,
                         inputs=inputDict, outputs=outputs)
@@ -1304,8 +1308,8 @@ def create_ui(app_config: ApplicationConfig):
     demo = gr.TabbedInterface([simpleTranscribe, fullTranscribe, uiTranslation], tab_names=["Simple", "Full", "Translation"], css=css)
     # Queue up the demo
-    if is_queue_mode:
-        demo.queue(concurrency_count=app_config.queue_concurrency_count)
         print("Queue mode enabled (concurrency count: " + str(app_config.queue_concurrency_count) + ")")
     else:
         print("Queue mode disabled - progress bars will not be shown.")

                     download.insert(0, downloadAllPath)
+                filterLogText = [gr.Text(visible=False)] #[Gradio 5.x] AttributeError: type object 'Textbox' has no attribute 'update'
                 if filterLogs:
+                    filterLogText[0].visible = True
+                    filterLogText[0].value = filterLogs
                 return [download, text, vtt] + filterLogText
         madlad400_models = list(filter(lambda madlad400: "ct2" in madlad400, madlad400_models))
     common_whisper_inputs = lambda : {
+        gr.Dropdown(label="Whisper - Model (for audio)", choices=whisper_models, value=app_config.default_model_name if app_config.default_model_name != None else [], elem_id="whisperModelName"),
+        gr.Dropdown(label="Whisper - Language", choices=sorted(get_lang_whisper_names()), value=app_config.language if app_config.language != None else [], elem_id="whisperLangName"),
     }
     common_m2m100_inputs = lambda : {
+        gr.Dropdown(label="M2M100 - Model (for translate)", choices=m2m100_models, value=[], elem_id="m2m100ModelName"),
+        gr.Dropdown(label="M2M100 - Language", choices=sorted(get_lang_m2m100_names()), value=[], elem_id="m2m100LangName"),
     }
     common_nllb_inputs = lambda : {
+        gr.Dropdown(label="NLLB - Model (for translate)", choices=nllb_models, value=[], elem_id="nllbModelName"),
+        gr.Dropdown(label="NLLB - Language", choices=sorted(get_lang_nllb_names()), value=[], elem_id="nllbLangName"),
     }
     common_mt5_inputs = lambda : {
+        gr.Dropdown(label="MT5 - Model (for translate)", choices=mt5_models, value=[], elem_id="mt5ModelName"),
+        gr.Dropdown(label="MT5 - Language", choices=sorted(get_lang_m2m100_names(["en", "ja", "zh"])), value=[], elem_id="mt5LangName"),
     }
     common_ALMA_inputs = lambda : {
+        gr.Dropdown(label="ALMA - Model (for translate)", choices=ALMA_models, value=[], elem_id="ALMAModelName"),
+        gr.Dropdown(label="ALMA - Language", choices=sort_lang_by_whisper_codes(["en", "de", "cs", "is", "ru", "zh", "ja"]), value=[], elem_id="ALMALangName"),
     }
     common_madlad400_inputs = lambda : {
+        gr.Dropdown(label="madlad400 - Model (for translate)", choices=madlad400_models, value=[], elem_id="madlad400ModelName"),
+        gr.Dropdown(label="madlad400 - Language", choices=sorted(get_lang_m2m100_names()), value=[], elem_id="madlad400LangName"),
     }
     common_seamless_inputs = lambda : {
+        gr.Dropdown(label="seamless - Model (for translate)", choices=seamless_models, value=[], elem_id="seamlessModelName"),
+        gr.Dropdown(label="seamless - Language", choices=sorted(get_lang_seamlessT_Tx_names()), value=[], elem_id="seamlessLangName"),
     }
     common_Llama_inputs = lambda : {
+        gr.Dropdown(label="Llama - Model (for translate)", choices=Llama_models, value=[], elem_id="LlamaModelName"),
+        gr.Dropdown(label="Llama - Language", choices=sorted(get_lang_m2m100_names()), value=[], elem_id="LlamaLangName"),
     }
     common_translation_inputs = lambda : {
     def create_transcribe(uiDescription: str, isQueueMode: bool, isFull: bool = False):
         with gr.Blocks() as transcribe:
+            translateInput = gr.State(value="m2m100") # [Gradio 5.x] TypeError: State.__init__() got an unexpected keyword argument 'elem_id'
+            sourceInput = gr.State(value="urlData")
+            translateInput.elem_id = "translateInput"
+            sourceInput.elem_id = "sourceInput"
             gr.Markdown(uiDescription)
             with gr.Row():
                 with gr.Column():
                             inputDict.update({gr.Text(label="URL (YouTube, etc.)", elem_id = "urlData")})
                         with gr.Tab(label="Upload") as UploadTab:
                             inputDict.update({gr.File(label="Upload Files", file_count="multiple", elem_id = "multipleFiles")})
+                        with gr.Tab(label="Microphone") as MicTab: # [Gradio 5.x] TypeError: Audio.__init__() got an unexpected keyword argument 'source'
+                            inputDict.update({gr.Audio(sources=["microphone"], type="filepath", label="Microphone Input", elem_id = "microphoneData")})
                         UrlTab.select(fn=lambda: "urlData", inputs = [], outputs= [sourceInput] )
                         UploadTab.select(fn=lambda: "multipleFiles", inputs = [], outputs= [sourceInput] )
                         MicTab.select(fn=lambda: "microphoneData", inputs = [], outputs= [sourceInput] )
             if readmeMd is not None:
                 with gr.Accordion("README.md", open=False):
                     gr.Markdown(readmeMd)
             inputDict.update({translateInput, sourceInput})
             submitBtn.click(fn=ui.transcribe_entry_progress if isQueueMode else ui.transcribe_entry,
                         inputs=inputDict, outputs=outputs)
     def create_translation(isQueueMode: bool):
         with gr.Blocks() as translation:
+            translateInput = gr.State(value="m2m100") # [Gradio 5.x] TypeError: State.__init__() got an unexpected keyword argument 'elem_id'
+            translateInput.elem_id = "translateInput"
             with gr.Row():
                 with gr.Column():
                     submitBtn = gr.Button("Submit", variant="primary")
                         llamaTab.select(fn=lambda: "Llama", inputs = [], outputs= [translateInput] )
                     with gr.Column():
                         inputDict.update({
+                            gr.Dropdown(label="Input - Language", choices=sorted(get_lang_whisper_names()), value=app_config.language if app_config.language != None else [], elem_id="inputLangName"),
                             gr.Text(lines=5, label="Input - Text", elem_id="inputText", elem_classes="scroll-show"),
                         })
                     with gr.Column():
             if translateModelMd is not None:
                 with gr.Accordion("docs/translateModel.md", open=False):
                     gr.Markdown(translateModelMd)
             inputDict.update({translateInput})
             submitBtn.click(fn=ui.translation_entry_progress if isQueueMode else ui.translation_entry,
                         inputs=inputDict, outputs=outputs)
     demo = gr.TabbedInterface([simpleTranscribe, fullTranscribe, uiTranslation], tab_names=["Simple", "Full", "Translation"], css=css)
     # Queue up the demo
+    if is_queue_mode: # [Gradio 5.x] TypeError: Blocks.queue() got an unexpected keyword argument 'concurrency_count'
+        demo.queue(default_concurrency_limit=app_config.queue_concurrency_count)
         print("Queue mode enabled (concurrency count: " + str(app_config.queue_concurrency_count) + ")")
     else:
         print("Queue mode disabled - progress bars will not be shown.")

config.json5 CHANGED Viewed

@@ -309,6 +309,12 @@
         "url": "avans06/Meta-Llama-3-8B-Instruct-ct2-int8_float16",
         "type": "huggingface",
         "tokenizer_url": "avans06/Meta-Llama-3-8B-Instruct-ct2-int8_float16"
       }
     ]
   },

         "url": "avans06/Meta-Llama-3-8B-Instruct-ct2-int8_float16",
         "type": "huggingface",
         "tokenizer_url": "avans06/Meta-Llama-3-8B-Instruct-ct2-int8_float16"
+      },
+      {
+        "name": "Meta-Llama-3.2-3B-Instruct-ct2-int8/jncraton",
+        "url": "jncraton/Llama-3.2-3B-Instruct-ct2-int8",
+        "type": "huggingface",
+        "tokenizer_url": "jncraton/Llama-3.2-3B-Instruct-ct2-int8"
       }
     ]
   },

requirements-fasterWhisper.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-transformers
-ctranslate2>=4.2.1
-faster-whisper>=1.0.2
 ffmpeg-python==0.2.0
-gradio==3.50.2
 yt-dlp
 json5
 torch

+transformers>=4.45.2
+ctranslate2>=4.4.0
+faster-whisper>=1.0.3
 ffmpeg-python==0.2.0
+gradio==5.0.2
 yt-dlp
 json5
 torch

requirements-whisper.txt CHANGED Viewed

@@ -1,12 +1,13 @@
-transformers
-ctranslate2>=4.2.1
 git+https://github.com/openai/whisper.git
 ffmpeg-python==0.2.0
-gradio==3.50.2
 yt-dlp
 torchaudio
 altair
-json5
 zhconv
 sentencepiece

+transformers>=4.45.2
+ctranslate2>=4.4.0
 git+https://github.com/openai/whisper.git
 ffmpeg-python==0.2.0
+gradio==5.0.2
 yt-dlp
+json5
+torch
 torchaudio
 altair
 zhconv
 sentencepiece

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-transformers
-ctranslate2>=4.2.1
-faster-whisper>=1.0.2
 ffmpeg-python==0.2.0
-gradio==3.50.2
 yt-dlp
 json5
 torch

+transformers>=4.45.2
+ctranslate2>=4.4.0
+faster-whisper>=1.0.3
 ffmpeg-python==0.2.0
+gradio==5.0.2
 yt-dlp
 json5
 torch