Spaces:
Sleeping
Sleeping
Reuben Tan
commited on
Commit
·
a3e4fa6
1
Parent(s):
eed8ced
fix upload error
Browse files- app.py +24 -11
- global_local/models/video_instruction_ft_model.py +1 -0
app.py
CHANGED
@@ -111,18 +111,30 @@ def gradio_reset(chat_state, img_list):
|
|
111 |
img_list = []
|
112 |
return None, gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), gr.update(placeholder='Please upload your video first', interactive=False),gr.update(value="Upload & Start Chat", interactive=True), chat_state, img_list
|
113 |
|
114 |
-
def upload_imgorvideo(gr_video, text_input, chat_state,chatbot):
|
115 |
if args.model_type == 'vicuna':
|
116 |
chat_state = default_conversation.copy()
|
117 |
else:
|
118 |
chat_state = conv_llava_llama_2.copy()
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
def gradio_ask(user_message, chatbot, chat_state):
|
128 |
if len(user_message) == 0:
|
@@ -185,7 +197,7 @@ with gr.Blocks() as demo:
|
|
185 |
with gr.Row():
|
186 |
with gr.Column(scale=0.5):
|
187 |
video = gr.Video()
|
188 |
-
|
189 |
#gr.Markdown(case_note_upload)
|
190 |
|
191 |
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
|
@@ -226,8 +238,9 @@ with gr.Blocks() as demo:
|
|
226 |
], inputs=[video, text_input])'''
|
227 |
|
228 |
gr.Markdown(cite_markdown)
|
229 |
-
upload_button.click(upload_imgorvideo, [video, text_input, chat_state,chatbot], [video, text_input, upload_button, chat_state, img_list,chatbot])
|
230 |
-
|
|
|
231 |
text_input.submit(gradio_ask, [text_input, chatbot, chat_state], [text_input, chatbot, chat_state]).then(
|
232 |
gradio_answer, [chatbot, chat_state, img_list, num_beams, temperature], [chatbot, chat_state, img_list]
|
233 |
)
|
|
|
111 |
img_list = []
|
112 |
return None, gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), gr.update(placeholder='Please upload your video first', interactive=False),gr.update(value="Upload & Start Chat", interactive=True), chat_state, img_list
|
113 |
|
114 |
+
def upload_imgorvideo(gr_video, gr_img, text_input, chat_state,chatbot):
|
115 |
if args.model_type == 'vicuna':
|
116 |
chat_state = default_conversation.copy()
|
117 |
else:
|
118 |
chat_state = conv_llava_llama_2.copy()
|
119 |
+
if gr_img is None and gr_video is None:
|
120 |
+
return None, None, None, gr.update(interactive=True), chat_state, None
|
121 |
+
elif gr_img is not None and gr_video is None:
|
122 |
+
print(gr_img)
|
123 |
+
chatbot = chatbot + [((gr_img,), None)]
|
124 |
+
chat_state.system = "You are able to understand the visual content that the user provides. Follow the instructions carefully and explain your answers in detail."
|
125 |
+
img_list = []
|
126 |
+
llm_message = chat.upload_img(gr_img, chat_state, img_list)
|
127 |
+
return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
|
128 |
+
elif gr_video is not None and gr_img is None:
|
129 |
+
print(gr_video)
|
130 |
+
chatbot = chatbot + [((gr_video,), None)]
|
131 |
+
chat_state.system = "You are able to understand the visual content that the user provides. Follow the instructions carefully and explain your answers in detail."
|
132 |
+
img_list = []
|
133 |
+
llm_message = chat.upload_video_without_audio(gr_video, chat_state, img_list)
|
134 |
+
return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
|
135 |
+
else:
|
136 |
+
# img_list = []
|
137 |
+
return gr.update(interactive=False), gr.update(interactive=False, placeholder='Currently, only one input is supported'), gr.update(value="Currently, only one input is supported", interactive=False), chat_state, None,chatbot
|
138 |
|
139 |
def gradio_ask(user_message, chatbot, chat_state):
|
140 |
if len(user_message) == 0:
|
|
|
197 |
with gr.Row():
|
198 |
with gr.Column(scale=0.5):
|
199 |
video = gr.Video()
|
200 |
+
image = gr.Image(type="filepath")
|
201 |
#gr.Markdown(case_note_upload)
|
202 |
|
203 |
upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
|
|
|
238 |
], inputs=[video, text_input])'''
|
239 |
|
240 |
gr.Markdown(cite_markdown)
|
241 |
+
#upload_button.click(upload_imgorvideo, [video, text_input, chat_state,chatbot], [video, text_input, upload_button, chat_state, img_list,chatbot])
|
242 |
+
upload_button.click(upload_imgorvideo, [video, image, text_input, chat_state,chatbot], [video, image, text_input, upload_button, chat_state, img_list,chatbot])
|
243 |
+
|
244 |
text_input.submit(gradio_ask, [text_input, chatbot, chat_state], [text_input, chatbot, chat_state]).then(
|
245 |
gradio_answer, [chatbot, chat_state, img_list, num_beams, temperature], [chatbot, chat_state, img_list]
|
246 |
)
|
global_local/models/video_instruction_ft_model.py
CHANGED
@@ -127,6 +127,7 @@ class VideoInstructionFTLLAMA(Blip2Base):
|
|
127 |
|
128 |
logging.info('Loading LLAMA Tokenizer')
|
129 |
self.llama_tokenizer = LlamaTokenizer.from_pretrained(llama_model, use_fast=False, token=os.environ['LLAMA_TOKEN'])
|
|
|
130 |
if self.llama_tokenizer.pad_token is None:
|
131 |
self.llama_tokenizer.pad_token = self.llama_tokenizer.unk_token
|
132 |
DEFAULT_IMAGE_PATCH_TOKEN = '<ImageHere>'
|
|
|
127 |
|
128 |
logging.info('Loading LLAMA Tokenizer')
|
129 |
self.llama_tokenizer = LlamaTokenizer.from_pretrained(llama_model, use_fast=False, token=os.environ['LLAMA_TOKEN'])
|
130 |
+
#self.llama_tokenizer = LlamaTokenizer.from_pretrained(llama_model, use_fast=False)
|
131 |
if self.llama_tokenizer.pad_token is None:
|
132 |
self.llama_tokenizer.pad_token = self.llama_tokenizer.unk_token
|
133 |
DEFAULT_IMAGE_PATCH_TOKEN = '<ImageHere>'
|