Spaces:

yeliudev
/

R2-Tuning

Sleeping

App Files Files Community

yeliudev commited on Jun 20, 2024

Commit

e96a3aa

1 Parent(s): 5124716

Fix plot

Browse files

Files changed (1) hide show

app.py +8 -7

app.py CHANGED Viewed

@@ -15,14 +15,14 @@ from nncore.engine import load_checkpoint
 from nncore.nn import build_model
 TITLE = '🌀R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'  # noqa
-DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to use a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'  # noqa
 CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
 WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'  # noqa
 def convert_time(seconds):
-    minutes, seconds = divmod(round(seconds), 60)
     return f'{minutes:02d}:{seconds:02d}'
@@ -59,7 +59,7 @@ def init_model(config, checkpoint):
     return model, cfg
-def main(video, query, model, cfg):
     if len(query) == 0:
         raise gr.Error('Text query can not be empty.')
@@ -82,23 +82,24 @@ def main(video, query, model, cfg):
     hd = pred['_out']['saliency'].cpu()
     hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
-    fig, ax = plt.subplots(figsize=(10, 5.5))
     ax.plot(range(0, len(hd) * 2, 2), hd)
     ax.set_xlabel('Time (s)', fontsize=15)
     ax.set_ylabel('Saliency Score', fontsize=15)
     ax.tick_params(labelsize=14)
-    plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
     return mr, fig
 model, cfg = init_model(CONFIG, WEIGHT)
-main = partial(main, model=model, cfg=cfg)
 demo = gr.Interface(
-    fn=main,
     inputs=[gr.Video(label='Video'),
             gr.Textbox(label='Text Query')],
     outputs=[

 from nncore.nn import build_model
 TITLE = '🌀R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'  # noqa
+DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to write a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'  # noqa
 CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
 WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'  # noqa
 def convert_time(seconds):
+    minutes, seconds = divmod(round(max(seconds, 0)), 60)
     return f'{minutes:02d}:{seconds:02d}'
     return model, cfg
+def main(video, query, model, cfg, fig, ax):
     if len(query) == 0:
         raise gr.Error('Text query can not be empty.')
     hd = pred['_out']['saliency'].cpu()
     hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
+    ax.cla()
     ax.plot(range(0, len(hd) * 2, 2), hd)
     ax.set_xlabel('Time (s)', fontsize=15)
     ax.set_ylabel('Saliency Score', fontsize=15)
     ax.tick_params(labelsize=14)
     return mr, fig
 model, cfg = init_model(CONFIG, WEIGHT)
+plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
+fig, ax = plt.subplots(figsize=(10, 5.5))
 demo = gr.Interface(
+    fn=partial(main, model=model, cfg=cfg, fig=fig, ax=ax),
     inputs=[gr.Video(label='Video'),
             gr.Textbox(label='Text Query')],
     outputs=[