yeliudev commited on
Commit
e96a3aa
ยท
1 Parent(s): 5124716
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -15,14 +15,14 @@ from nncore.engine import load_checkpoint
15
  from nncore.nn import build_model
16
 
17
  TITLE = '๐ŸŒ€R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
18
- DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to use a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.' # noqa
19
 
20
  CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
21
  WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
22
 
23
 
24
  def convert_time(seconds):
25
- minutes, seconds = divmod(round(seconds), 60)
26
  return f'{minutes:02d}:{seconds:02d}'
27
 
28
 
@@ -59,7 +59,7 @@ def init_model(config, checkpoint):
59
  return model, cfg
60
 
61
 
62
- def main(video, query, model, cfg):
63
  if len(query) == 0:
64
  raise gr.Error('Text query can not be empty.')
65
 
@@ -82,23 +82,24 @@ def main(video, query, model, cfg):
82
  hd = pred['_out']['saliency'].cpu()
83
  hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
84
 
85
- fig, ax = plt.subplots(figsize=(10, 5.5))
86
  ax.plot(range(0, len(hd) * 2, 2), hd)
87
 
88
  ax.set_xlabel('Time (s)', fontsize=15)
89
  ax.set_ylabel('Saliency Score', fontsize=15)
90
 
91
  ax.tick_params(labelsize=14)
92
- plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
93
 
94
  return mr, fig
95
 
96
 
97
  model, cfg = init_model(CONFIG, WEIGHT)
98
- main = partial(main, model=model, cfg=cfg)
 
 
99
 
100
  demo = gr.Interface(
101
- fn=main,
102
  inputs=[gr.Video(label='Video'),
103
  gr.Textbox(label='Text Query')],
104
  outputs=[
 
15
  from nncore.nn import build_model
16
 
17
  TITLE = '๐ŸŒ€R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
18
+ DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to write a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.' # noqa
19
 
20
  CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
21
  WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
22
 
23
 
24
  def convert_time(seconds):
25
+ minutes, seconds = divmod(round(max(seconds, 0)), 60)
26
  return f'{minutes:02d}:{seconds:02d}'
27
 
28
 
 
59
  return model, cfg
60
 
61
 
62
+ def main(video, query, model, cfg, fig, ax):
63
  if len(query) == 0:
64
  raise gr.Error('Text query can not be empty.')
65
 
 
82
  hd = pred['_out']['saliency'].cpu()
83
  hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
84
 
85
+ ax.cla()
86
  ax.plot(range(0, len(hd) * 2, 2), hd)
87
 
88
  ax.set_xlabel('Time (s)', fontsize=15)
89
  ax.set_ylabel('Saliency Score', fontsize=15)
90
 
91
  ax.tick_params(labelsize=14)
 
92
 
93
  return mr, fig
94
 
95
 
96
  model, cfg = init_model(CONFIG, WEIGHT)
97
+
98
+ plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
99
+ fig, ax = plt.subplots(figsize=(10, 5.5))
100
 
101
  demo = gr.Interface(
102
+ fn=partial(main, model=model, cfg=cfg, fig=fig, ax=ax),
103
  inputs=[gr.Video(label='Video'),
104
  gr.Textbox(label='Text Query')],
105
  outputs=[