Spaces:

omni-research
/

Tarsier2-7b

Running on Zero

0nejiawei commited on Nov 5, 2024

Commit

b60442d

1 Parent(s): d1b43bd

add examples

Files changed (5) hide show

.gitattributes CHANGED Viewed

@@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/ filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -93,7 +93,7 @@ def gradio_ask(user_message, chatbot, chat_state):
 @spaces.GPU(duration=120)
 def gradio_answer(chatbot, chat_state, img_file, img_list, top_p, temperature, n_frames=None):
-    llm_message, chat_state, img_list = chat.answer(conv=chat_state, visual_data_file=img_file, images=img_list, n_frames=n_frames, max_new_tokens=512, num_beams=1, temperature=temperature, top_p=top_p)
     chatbot[-1][1] = llm_message
     print(chat_state)
     print(f"Answer: {llm_message}")
@@ -195,7 +195,7 @@ with gr.Blocks(title="Tarsier",theme=gvlabtheme,css="#chatbot {overflow:auto; he
             num_frames = gr.Slider(
                 minimum=4,
                 maximum=16,
-                value=8,
                 step=2,
                 interactive=True,
                 label="#Frames",
@@ -212,7 +212,11 @@ with gr.Blocks(title="Tarsier",theme=gvlabtheme,css="#chatbot {overflow:auto; he
                 with gr.Column(scale=0.15, min_width=0):
                     run = gr.Button("💭Send")
                 with gr.Column(scale=0.15, min_width=0):
-                    clear = gr.Button("🔄Clear️")
     chat = init_model()
     upload_button.click(upload_img, [up_image, up_video, up_gif, chat_state, num_frames], [up_image, up_video, up_gif, text_input, upload_button, chat_state, img_file, img_list])

 @spaces.GPU(duration=120)
 def gradio_answer(chatbot, chat_state, img_file, img_list, top_p, temperature, n_frames=None):
+    llm_message, chat_state, img_list = chat.answer(conv=chat_state, visual_data_file=img_file, images=img_list, n_frames=n_frames, max_new_tokens=256, num_beams=1, temperature=temperature, top_p=top_p)
     chatbot[-1][1] = llm_message
     print(chat_state)
     print(f"Answer: {llm_message}")
             num_frames = gr.Slider(
                 minimum=4,
                 maximum=16,
+                value=16,
                 step=2,
                 interactive=True,
                 label="#Frames",
                 with gr.Column(scale=0.15, min_width=0):
                     run = gr.Button("💭Send")
                 with gr.Column(scale=0.15, min_width=0):
+                    clear = gr.Button("🔄Clear️")
+            gr.Examples(examples=[
+                    [f"examples/test1.mp4", "Describe the video in detail."],
+                    [f"examples/test2.mp4", "Are they having a pleasant conversation?"],
+                ], inputs=[up_video, text_input])
     chat = init_model()
     upload_button.click(upload_img, [up_image, up_video, up_gif, chat_state, num_frames], [up_image, up_video, up_gif, text_input, upload_button, chat_state, img_file, img_list])

examples/test1.mp4 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8306bc73af1fb02c298e952e51dda6bd583f34700e9f7f88aa3aa905ca94608f
+size 2283104

examples/test2.mp4 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f78d23a80429d6954ef3f36f08b8efb7a4de29985bf0bcca7ccb3f1bea9ae10b
+size 3040156

tools/conversation.py CHANGED Viewed

@@ -107,7 +107,7 @@ class Chat:
         inputs = {k:v.to(self.device) for k,v in inputs.items() if v is not None}
         return inputs, conv, images
-    def answer(self, conv, visual_data_file=None, images=None, n_frames=None, max_new_tokens=512, num_beams=1, min_length=1, top_p=1.0,
                repetition_penalty=1.0, length_penalty=1, temperature=0):
         inputs, conv, images = self.prepare_model_inputs(conv, visual_data_file, images, n_frames)
         if self.model is not None:

         inputs = {k:v.to(self.device) for k,v in inputs.items() if v is not None}
         return inputs, conv, images
+    def answer(self, conv, visual_data_file=None, images=None, n_frames=None, max_new_tokens=256, num_beams=1, min_length=1, top_p=1.0,
                repetition_penalty=1.0, length_penalty=1, temperature=0):
         inputs, conv, images = self.prepare_model_inputs(conv, visual_data_file, images, n_frames)
         if self.model is not None: