bwconrad commited on
Commit
b7b6d2a
·
1 Parent(s): f121c5e
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -106,7 +106,7 @@ def run(
106
  ).properties(width=600)
107
  rule = alt.Chart().mark_rule(strokeDash=[6, 3], size=2).encode(y=alt.datum(thresh))
108
 
109
- return matches[:30], lines + rule
110
 
111
 
112
  class LoadVideo(Dataset):
@@ -194,8 +194,13 @@ MODELS = {
194
 
195
 
196
  if __name__ == "__main__":
 
 
 
 
 
197
  text_app = gr.Interface(
198
- description="Search the content's of a video with a text description.",
199
  fn=run,
200
  inputs=[
201
  gr.Video(label="Video"),
@@ -212,16 +217,21 @@ if __name__ == "__main__":
212
  gr.Checkbox(label="Center Crop"),
213
  ],
214
  outputs=[
 
215
  gr.Gallery(label="Matched Frames").style(
216
  columns=2, object_fit="contain", height="auto"
217
  ),
218
- gr.Plot(label="Similarity Plot"),
219
  ],
220
  allow_flagging="never",
221
  )
222
 
 
 
 
 
 
223
  image_app = gr.Interface(
224
- description="Search the content's of a video with an image query.",
225
  fn=run,
226
  inputs=[
227
  gr.Video(label="Video"),
@@ -238,10 +248,10 @@ if __name__ == "__main__":
238
  gr.Checkbox(label="Center Crop"),
239
  ],
240
  outputs=[
 
241
  gr.Gallery(label="Matched Frames").style(
242
  columns=2, object_fit="contain", height="auto"
243
  ),
244
- gr.Plot(label="Similarity Plot"),
245
  ],
246
  allow_flagging="never",
247
  )
 
106
  ).properties(width=600)
107
  rule = alt.Chart().mark_rule(strokeDash=[6, 3], size=2).encode(y=alt.datum(thresh))
108
 
109
+ return lines + rule, matches[:30] # Only return up to 30 images to not crash the UI
110
 
111
 
112
  class LoadVideo(Dataset):
 
194
 
195
 
196
  if __name__ == "__main__":
197
+ desc_text = """
198
+ Search the content's of a video with a text description. This application utilizes ConvNext CLIP models from [OpenCLIP](https://github.com/mlfoundations/open_clip) to compare video frames with the feature representation of a user text or image query. Code can be found at [this repo](https://github.com/bwconrad/video-content-search).
199
+
200
+ __Note__: Long videos (over a few minutes) may cause UI performance issues.
201
+ """
202
  text_app = gr.Interface(
203
+ description=desc_text,
204
  fn=run,
205
  inputs=[
206
  gr.Video(label="Video"),
 
217
  gr.Checkbox(label="Center Crop"),
218
  ],
219
  outputs=[
220
+ gr.Plot(label="Similarity Plot"),
221
  gr.Gallery(label="Matched Frames").style(
222
  columns=2, object_fit="contain", height="auto"
223
  ),
 
224
  ],
225
  allow_flagging="never",
226
  )
227
 
228
+ desc_image = """
229
+ Search the content's of a video with an image query. This application utilizes ConvNext CLIP models from [OpenCLIP](https://github.com/mlfoundations/open_clip) to compare video frames with the feature representation of a user text or image query. Code can be found at [this repo](https://github.com/bwconrad/video-content-search).
230
+
231
+ __Note__: Long videos (over a few minutes) may cause UI performance issues.
232
+ """
233
  image_app = gr.Interface(
234
+ description=desc_image,
235
  fn=run,
236
  inputs=[
237
  gr.Video(label="Video"),
 
248
  gr.Checkbox(label="Center Crop"),
249
  ],
250
  outputs=[
251
+ gr.Plot(label="Similarity Plot"),
252
  gr.Gallery(label="Matched Frames").style(
253
  columns=2, object_fit="contain", height="auto"
254
  ),
 
255
  ],
256
  allow_flagging="never",
257
  )