wangjiawei.424 commited on
Commit
b60442d
1 Parent(s): d1b43bd

add examples

Browse files
.gitattributes CHANGED
@@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
36
  *.mp4 filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/ filter=lfs diff=lfs merge=lfs -text
37
  *.mp4 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -93,7 +93,7 @@ def gradio_ask(user_message, chatbot, chat_state):
93
 
94
  @spaces.GPU(duration=120)
95
  def gradio_answer(chatbot, chat_state, img_file, img_list, top_p, temperature, n_frames=None):
96
- llm_message, chat_state, img_list = chat.answer(conv=chat_state, visual_data_file=img_file, images=img_list, n_frames=n_frames, max_new_tokens=512, num_beams=1, temperature=temperature, top_p=top_p)
97
  chatbot[-1][1] = llm_message
98
  print(chat_state)
99
  print(f"Answer: {llm_message}")
@@ -195,7 +195,7 @@ with gr.Blocks(title="Tarsier",theme=gvlabtheme,css="#chatbot {overflow:auto; he
195
  num_frames = gr.Slider(
196
  minimum=4,
197
  maximum=16,
198
- value=8,
199
  step=2,
200
  interactive=True,
201
  label="#Frames",
@@ -212,7 +212,11 @@ with gr.Blocks(title="Tarsier",theme=gvlabtheme,css="#chatbot {overflow:auto; he
212
  with gr.Column(scale=0.15, min_width=0):
213
  run = gr.Button("💭Send")
214
  with gr.Column(scale=0.15, min_width=0):
215
- clear = gr.Button("🔄Clear️")
 
 
 
 
216
 
217
  chat = init_model()
218
  upload_button.click(upload_img, [up_image, up_video, up_gif, chat_state, num_frames], [up_image, up_video, up_gif, text_input, upload_button, chat_state, img_file, img_list])
 
93
 
94
  @spaces.GPU(duration=120)
95
  def gradio_answer(chatbot, chat_state, img_file, img_list, top_p, temperature, n_frames=None):
96
+ llm_message, chat_state, img_list = chat.answer(conv=chat_state, visual_data_file=img_file, images=img_list, n_frames=n_frames, max_new_tokens=256, num_beams=1, temperature=temperature, top_p=top_p)
97
  chatbot[-1][1] = llm_message
98
  print(chat_state)
99
  print(f"Answer: {llm_message}")
 
195
  num_frames = gr.Slider(
196
  minimum=4,
197
  maximum=16,
198
+ value=16,
199
  step=2,
200
  interactive=True,
201
  label="#Frames",
 
212
  with gr.Column(scale=0.15, min_width=0):
213
  run = gr.Button("💭Send")
214
  with gr.Column(scale=0.15, min_width=0):
215
+ clear = gr.Button("🔄Clear️")
216
+ gr.Examples(examples=[
217
+ [f"examples/test1.mp4", "Describe the video in detail."],
218
+ [f"examples/test2.mp4", "Are they having a pleasant conversation?"],
219
+ ], inputs=[up_video, text_input])
220
 
221
  chat = init_model()
222
  upload_button.click(upload_img, [up_image, up_video, up_gif, chat_state, num_frames], [up_image, up_video, up_gif, text_input, upload_button, chat_state, img_file, img_list])
examples/test1.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8306bc73af1fb02c298e952e51dda6bd583f34700e9f7f88aa3aa905ca94608f
3
+ size 2283104
examples/test2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78d23a80429d6954ef3f36f08b8efb7a4de29985bf0bcca7ccb3f1bea9ae10b
3
+ size 3040156
tools/conversation.py CHANGED
@@ -107,7 +107,7 @@ class Chat:
107
  inputs = {k:v.to(self.device) for k,v in inputs.items() if v is not None}
108
  return inputs, conv, images
109
 
110
- def answer(self, conv, visual_data_file=None, images=None, n_frames=None, max_new_tokens=512, num_beams=1, min_length=1, top_p=1.0,
111
  repetition_penalty=1.0, length_penalty=1, temperature=0):
112
  inputs, conv, images = self.prepare_model_inputs(conv, visual_data_file, images, n_frames)
113
  if self.model is not None:
 
107
  inputs = {k:v.to(self.device) for k,v in inputs.items() if v is not None}
108
  return inputs, conv, images
109
 
110
+ def answer(self, conv, visual_data_file=None, images=None, n_frames=None, max_new_tokens=256, num_beams=1, min_length=1, top_p=1.0,
111
  repetition_penalty=1.0, length_penalty=1, temperature=0):
112
  inputs, conv, images = self.prepare_model_inputs(conv, visual_data_file, images, n_frames)
113
  if self.model is not None: