Spaces:

mpc001
/

auto_avsr

Runtime error

App Files Files Community

mpc001 commited on Jun 15, 2023

Commit

7b95e93

•

1 Parent(s): 63e2ee7

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -17

app.py CHANGED Viewed

@@ -15,19 +15,18 @@ TITLE = """
         <h1 style="font-weight: 900; margin-bottom: 7px;">
             Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
         </h1>
-        <h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
-        [<a href="https://arxiv.org/abs/2303.14307" style="color:blue;">arXiv</a>]
-        [<a href="https://github.com/mpc001/auto_avsr" style="color:blue;">Code</a>]
-        </h3>
         </div>
         <p style="margin-bottom: 10px; font-size: 94%">
-        Want to recognise the content from audio or visual information?<br>The Auto-AVSR is here to get you answers!
         </p>
     </div>
 """
 ARTICLE = """
 <div style="text-align: center; max-width: 650px; margin: 0 auto;">
     <p>
     Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
     </p>
@@ -73,32 +72,20 @@ pipelines = {
 print("Step 0. Model has been loaded.")
 def fn(pipeline_type, filename):
-    print("Step 0. Video has been uploaded.")
     selected_pipeline_instance = pipelines[pipeline_type]
-    print("Step 1. Video has been converted.")
     landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
-    print("Step 2. Landmarks have been detected.")
     data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
-    print("Step 3. Data has been preprocessed.")
     transcript = selected_pipeline_instance.model.infer(data)
-    print("Step 4. Inference has been done.")
-    print(f"transcript: {transcript}")
     return transcript
 demo = gr.Blocks(css=CSS)
 with demo:
     gr.HTML(TITLE)
     dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
     video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
     text = gr.Textbox(label="PREDICTION")
     btn = gr.Button("Submit").style(full_width=True)
     btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
     gr.HTML(ARTICLE)
 demo.launch()

         <h1 style="font-weight: 900; margin-bottom: 7px;">
             Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
         </h1>
         </div>
         <p style="margin-bottom: 10px; font-size: 94%">
+        Want to recognize content in a noisy environment?<br>Our Auto-AVSR models are here to transcribe your answers from audio or visual information!
         </p>
     </div>
 """
 ARTICLE = """
 <div style="text-align: center; max-width: 650px; margin: 0 auto;">
+    <p>
+    Want to look into models? You can find our [<a href="https://github.com/mpc001/auto_avsr">training code</a>] and [<a href="https://arxiv.org/abs/2303.14307">paper</a>].
+    </p>
     <p>
     Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
     </p>
 print("Step 0. Model has been loaded.")
 def fn(pipeline_type, filename):
     selected_pipeline_instance = pipelines[pipeline_type]
     landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
     data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
     transcript = selected_pipeline_instance.model.infer(data)
     return transcript
 demo = gr.Blocks(css=CSS)
 with demo:
     gr.HTML(TITLE)
     dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
     video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
     text = gr.Textbox(label="PREDICTION")
     btn = gr.Button("Submit").style(full_width=True)
     btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
     gr.HTML(ARTICLE)
 demo.launch()