yeliudev commited on
Commit
e9fc911
ยท
1 Parent(s): ed74388

Add example videos

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ data filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -5,6 +5,5 @@ __pycache__/
5
 
6
  # Temporary data
7
  /checkpoints
8
- /flagged
9
  .DS_Store
10
  ._*
 
5
 
6
  # Temporary data
7
  /checkpoints
 
8
  .DS_Store
9
  ._*
app.py CHANGED
@@ -1,24 +1,39 @@
1
  # Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
2
 
 
3
  from functools import partial
4
 
5
  import clip
6
  import decord
7
- import nncore
8
- import torch
9
  import gradio as gr
 
10
  import numpy as np
11
- import pandas as pd
12
  import torchvision.transforms.functional as F
13
  from decord import VideoReader
14
  from nncore.engine import load_checkpoint
15
  from nncore.nn import build_model
16
 
17
- TITLE = '๐ŸŒ€R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding' # noqa
18
- DESCRIPTION = 'R2-Tuning is a parameter- and memory efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.\n\nUser Guide:\n1. Upload or record a video using web camera.\n2. Input a text query. A good practice is to write a sentence with 5~10 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.' # noqa
 
 
 
 
 
19
 
20
  CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
21
- WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth' # noqa
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  def convert_time(seconds):
@@ -88,22 +103,34 @@ def main(video, query, model, cfg):
88
 
89
  model, cfg = init_model(CONFIG, WEIGHT)
90
 
91
- demo = gr.Interface(
92
- fn=partial(main, model=model, cfg=cfg),
93
- inputs=[gr.Video(label='Video'),
94
- gr.Textbox(label='Text Query')],
95
- outputs=[
96
- gr.Dataframe(
97
- headers=['Start Time', 'End Time', 'Score'], label='Moment Retrieval'),
98
- gr.LinePlot(
99
- x='x',
100
- y='y',
101
- x_title='Time (seconds)',
102
- y_title='Saliency Score',
103
- label='Highlight Detection')
104
- ],
105
- allow_flagging='never',
106
- title=TITLE,
107
- description=DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  demo.launch()
 
1
  # Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
2
 
3
+ import random
4
  from functools import partial
5
 
6
  import clip
7
  import decord
 
 
8
  import gradio as gr
9
+ import nncore
10
  import numpy as np
11
+ import torch
12
  import torchvision.transforms.functional as F
13
  from decord import VideoReader
14
  from nncore.engine import load_checkpoint
15
  from nncore.nn import build_model
16
 
17
+ import pandas as pd
18
+
19
+ TITLE = '๐ŸŒ€R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'
20
+
21
+ TITLE_MD = '<h1 align="center">๐ŸŒ€R<sup>2</sup>-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding</h1>'
22
+ DESCRIPTION_MD = 'R<sup>2</sup>-Tuning is a parameter- and memory-efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.'
23
+ GUIDE_MD = '### User Guide:\n1. Upload a video or click "random" to sample one.\n2. Input a text query. A good practice is to write a sentence with 5~15 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'
24
 
25
  CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
26
+ WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'
27
+
28
+ # yapf:disable
29
+ EXAMPLES = [
30
+ ('data/gTAvxnQtjXM_60.0_210.0.mp4', 'A man in a white t shirt wearing a backpack is showing a nearby cathedral.'),
31
+ ('data/pA6Z-qYhSNg_210.0_360.0.mp4', 'Different Facebook posts on transgender bathrooms are shown.'),
32
+ ('data/CkWOpyrAXdw_210.0_360.0.mp4', 'Indian girl cleaning her kitchen before cooking.'),
33
+ ('data/ocLUzCNodj4_360.0_510.0.mp4', 'A woman stands in her bedroom in front of a mirror and talks.'),
34
+ ('data/HkLfNhgP0TM_660.0_810.0.mp4', 'Woman lays down on the couch while talking to the camera.')
35
+ ]
36
+ # yapf:enable
37
 
38
 
39
  def convert_time(seconds):
 
103
 
104
  model, cfg = init_model(CONFIG, WEIGHT)
105
 
106
+ fn = partial(main, model=model, cfg=cfg)
107
+
108
+ with gr.Blocks(title=TITLE) as demo:
109
+ gr.Markdown(TITLE_MD)
110
+ gr.Markdown(DESCRIPTION_MD)
111
+ gr.Markdown(GUIDE_MD)
112
+
113
+ with gr.Row():
114
+ with gr.Column():
115
+ video = gr.Video(label='Video')
116
+ query = gr.Textbox(label='Text Query')
117
+
118
+ with gr.Row():
119
+ random_btn = gr.Button(value='๐Ÿ”ฎ Random')
120
+ gr.ClearButton([video, query], value='๐Ÿ—‘๏ธ Reset')
121
+ submit_btn = gr.Button(value='๐Ÿš€ Submit')
122
+
123
+ with gr.Column():
124
+ mr = gr.DataFrame(
125
+ headers=['Start Time', 'End Time', 'Score'], label='Moment Retrieval')
126
+ hd = gr.LinePlot(
127
+ x='x',
128
+ y='y',
129
+ x_title='Time (seconds)',
130
+ y_title='Saliency Score',
131
+ label='Highlight Detection')
132
+
133
+ random_btn.click(lambda: random.sample(EXAMPLES, 1)[0], None, [video, query])
134
+ submit_btn.click(fn, [video, query], [mr, hd])
135
 
136
  demo.launch()
data/CkWOpyrAXdw_210.0_360.0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a433f49ddeabe2ac5eae143e5129de8f2b6ae3838d286b94c838b0b01f9365
3
+ size 6004497
data/HkLfNhgP0TM_660.0_810.0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9ec60b9b5c2f0d235465610f3680216c42c87ce777a6698a78f263711bde36
3
+ size 5166216
data/gTAvxnQtjXM_60.0_210.0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beedc2fd64f1c9da322a32b5246b2219726784abf92f0b0236bc8bb16ba5497b
3
+ size 7422854
data/ocLUzCNodj4_360.0_510.0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093de4b36ca46d8b410f01b0cebc1f36c05669f6cb3cb4b5514f7de0329fdceb
3
+ size 9791456
data/pA6Z-qYhSNg_210.0_360.0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a34d5e47ebdb868ea24fac0d38d3cb063c16cf947a91eb77056cc389fc224421
3
+ size 6419206
setup.cfg CHANGED
@@ -12,4 +12,4 @@ no_lines_before = STDLIB,LOCALFOLDER
12
  default_section = FIRSTPARTY
13
 
14
  [flake8]
15
- max-line-length = 90
 
12
  default_section = FIRSTPARTY
13
 
14
  [flake8]
15
+ max-line-length = 500