Fix plot
Browse files
app.py
CHANGED
@@ -15,14 +15,14 @@ from nncore.engine import load_checkpoint
|
|
15 |
from nncore.nn import build_model
|
16 |
|
17 |
TITLE = '๐R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
|
18 |
-
DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to
|
19 |
|
20 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
21 |
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
|
22 |
|
23 |
|
24 |
def convert_time(seconds):
|
25 |
-
minutes, seconds = divmod(round(seconds), 60)
|
26 |
return f'{minutes:02d}:{seconds:02d}'
|
27 |
|
28 |
|
@@ -59,7 +59,7 @@ def init_model(config, checkpoint):
|
|
59 |
return model, cfg
|
60 |
|
61 |
|
62 |
-
def main(video, query, model, cfg):
|
63 |
if len(query) == 0:
|
64 |
raise gr.Error('Text query can not be empty.')
|
65 |
|
@@ -82,23 +82,24 @@ def main(video, query, model, cfg):
|
|
82 |
hd = pred['_out']['saliency'].cpu()
|
83 |
hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
|
84 |
|
85 |
-
|
86 |
ax.plot(range(0, len(hd) * 2, 2), hd)
|
87 |
|
88 |
ax.set_xlabel('Time (s)', fontsize=15)
|
89 |
ax.set_ylabel('Saliency Score', fontsize=15)
|
90 |
|
91 |
ax.tick_params(labelsize=14)
|
92 |
-
plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
|
93 |
|
94 |
return mr, fig
|
95 |
|
96 |
|
97 |
model, cfg = init_model(CONFIG, WEIGHT)
|
98 |
-
|
|
|
|
|
99 |
|
100 |
demo = gr.Interface(
|
101 |
-
fn=main,
|
102 |
inputs=[gr.Video(label='Video'),
|
103 |
gr.Textbox(label='Text Query')],
|
104 |
outputs=[
|
|
|
15 |
from nncore.nn import build_model
|
16 |
|
17 |
TITLE = '๐R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
|
18 |
+
DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to write a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.' # noqa
|
19 |
|
20 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
21 |
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
|
22 |
|
23 |
|
24 |
def convert_time(seconds):
|
25 |
+
minutes, seconds = divmod(round(max(seconds, 0)), 60)
|
26 |
return f'{minutes:02d}:{seconds:02d}'
|
27 |
|
28 |
|
|
|
59 |
return model, cfg
|
60 |
|
61 |
|
62 |
+
def main(video, query, model, cfg, fig, ax):
|
63 |
if len(query) == 0:
|
64 |
raise gr.Error('Text query can not be empty.')
|
65 |
|
|
|
82 |
hd = pred['_out']['saliency'].cpu()
|
83 |
hd = ((hd - hd.min()) / (hd.max() - hd.min())).tolist()
|
84 |
|
85 |
+
ax.cla()
|
86 |
ax.plot(range(0, len(hd) * 2, 2), hd)
|
87 |
|
88 |
ax.set_xlabel('Time (s)', fontsize=15)
|
89 |
ax.set_ylabel('Saliency Score', fontsize=15)
|
90 |
|
91 |
ax.tick_params(labelsize=14)
|
|
|
92 |
|
93 |
return mr, fig
|
94 |
|
95 |
|
96 |
model, cfg = init_model(CONFIG, WEIGHT)
|
97 |
+
|
98 |
+
plt.tight_layout(rect=(0.02, 0.02, 0.95, 0.885))
|
99 |
+
fig, ax = plt.subplots(figsize=(10, 5.5))
|
100 |
|
101 |
demo = gr.Interface(
|
102 |
+
fn=partial(main, model=model, cfg=cfg, fig=fig, ax=ax),
|
103 |
inputs=[gr.Video(label='Video'),
|
104 |
gr.Textbox(label='Text Query')],
|
105 |
outputs=[
|