mpc001 commited on
Commit
7b95e93
1 Parent(s): 63e2ee7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -17
app.py CHANGED
@@ -15,19 +15,18 @@ TITLE = """
15
  <h1 style="font-weight: 900; margin-bottom: 7px;">
16
  Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
17
  </h1>
18
- <h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
19
- [<a href="https://arxiv.org/abs/2303.14307" style="color:blue;">arXiv</a>]
20
- [<a href="https://github.com/mpc001/auto_avsr" style="color:blue;">Code</a>]
21
- </h3>
22
  </div>
23
  <p style="margin-bottom: 10px; font-size: 94%">
24
- Want to recognise the content from audio or visual information?<br>The Auto-AVSR is here to get you answers!
25
  </p>
26
  </div>
27
  """
28
 
29
  ARTICLE = """
30
  <div style="text-align: center; max-width: 650px; margin: 0 auto;">
 
 
 
31
  <p>
32
  Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
33
  </p>
@@ -73,32 +72,20 @@ pipelines = {
73
  print("Step 0. Model has been loaded.")
74
 
75
  def fn(pipeline_type, filename):
76
- print("Step 0. Video has been uploaded.")
77
  selected_pipeline_instance = pipelines[pipeline_type]
78
- print("Step 1. Video has been converted.")
79
  landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
80
- print("Step 2. Landmarks have been detected.")
81
  data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
82
- print("Step 3. Data has been preprocessed.")
83
  transcript = selected_pipeline_instance.model.infer(data)
84
- print("Step 4. Inference has been done.")
85
- print(f"transcript: {transcript}")
86
  return transcript
87
 
88
  demo = gr.Blocks(css=CSS)
89
 
90
  with demo:
91
-
92
  gr.HTML(TITLE)
93
-
94
-
95
  dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
96
  video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
97
  text = gr.Textbox(label="PREDICTION")
98
  btn = gr.Button("Submit").style(full_width=True)
99
-
100
  btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
101
-
102
  gr.HTML(ARTICLE)
103
-
104
  demo.launch()
 
15
  <h1 style="font-weight: 900; margin-bottom: 7px;">
16
  Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
17
  </h1>
 
 
 
 
18
  </div>
19
  <p style="margin-bottom: 10px; font-size: 94%">
20
+ Want to recognize content in a noisy environment?<br>Our Auto-AVSR models are here to transcribe your answers from audio or visual information!
21
  </p>
22
  </div>
23
  """
24
 
25
  ARTICLE = """
26
  <div style="text-align: center; max-width: 650px; margin: 0 auto;">
27
+ <p>
28
+ Want to look into models? You can find our [<a href="https://github.com/mpc001/auto_avsr">training code</a>] and [<a href="https://arxiv.org/abs/2303.14307">paper</a>].
29
+ </p>
30
  <p>
31
  Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
32
  </p>
 
72
  print("Step 0. Model has been loaded.")
73
 
74
  def fn(pipeline_type, filename):
 
75
  selected_pipeline_instance = pipelines[pipeline_type]
 
76
  landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
 
77
  data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
 
78
  transcript = selected_pipeline_instance.model.infer(data)
 
 
79
  return transcript
80
 
81
  demo = gr.Blocks(css=CSS)
82
 
83
  with demo:
 
84
  gr.HTML(TITLE)
 
 
85
  dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
86
  video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
87
  text = gr.Textbox(label="PREDICTION")
88
  btn = gr.Button("Submit").style(full_width=True)
 
89
  btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
 
90
  gr.HTML(ARTICLE)
 
91
  demo.launch()