whisper-demo-french

Sleeping

App Files Files Community

bofenghuang commited on Dec 14, 2022

Commit

4503426

1 Parent(s): 79c3205

up

Browse files

Files changed (1) hide show

run_demo_multi_models.py +16 -9

run_demo_multi_models.py CHANGED Viewed

@@ -31,6 +31,7 @@ logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 device = 0 if torch.cuda.is_available() else "cpu"
 cached_models = {}
@@ -38,6 +39,7 @@ def maybe_load_cached_pipeline(model_name):
     pipe = cached_models.get(model_name)
     if pipe is None:
         # load pipeline
         pipe = pipeline(
             task="automatic-speech-recognition",
             model=model_name,
@@ -105,11 +107,12 @@ demo = gr.Blocks()
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Audio(source="upload", type="filepath", optional=True),
-        gr.inputs.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
     ],
-    outputs="text",
     layout="horizontal",
     theme="huggingface",
     title="Whisper Demo: Transcribe Audio",
@@ -120,10 +123,14 @@ mf_transcribe = gr.Interface(
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
-        gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-        gr.inputs.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
     ],
-    outputs=["html", "text"],
     layout="horizontal",
     theme="huggingface",
     title="Whisper Demo: Transcribe YouTube",
@@ -134,5 +141,5 @@ yt_transcribe = gr.Interface(
 with demo:
     gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
-# demo.launch(server_name="0.0.0.0", debug=True, share=True)
-demo.launch(enable_queue=True)

 logger.setLevel(logging.DEBUG)
 device = 0 if torch.cuda.is_available() else "cpu"
+logger.info(f"Model will be loaded on device {device}")
 cached_models = {}
     pipe = cached_models.get(model_name)
     if pipe is None:
         # load pipeline
+        # todo: set decoding option for pipeline
         pipe = pipeline(
             task="automatic-speech-recognition",
             model=model_name,
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(source="microphone", type="filepath", optional=True, label="Record"),
+        gr.Audio(source="upload", type="filepath", optional=True, label="Upload File"),
+        gr.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
     ],
+    # outputs="text",
+    outputs=gr.Textbox(label="Transcription"),
     layout="horizontal",
     theme="huggingface",
     title="Whisper Demo: Transcribe Audio",
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
+        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        gr.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
+    ],
+    # outputs=["html", "text"],
+    outputs=[
+        gr.HTML(label="YouTube Page"),
+        gr.Textbox(label="Transcription"),
     ],
     layout="horizontal",
     theme="huggingface",
     title="Whisper Demo: Transcribe YouTube",
 with demo:
     gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
+demo.launch(server_name="0.0.0.0", debug=True, share=True)
+# demo.launch(enable_queue=True)