paralym commited on
Commit
2d88def
1 Parent(s): 31867a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -320,6 +320,8 @@ def add_message(history, message):
320
  # return history, gr.MultimodalTextbox(value=None, interactive=False)
321
  # else:
322
  for x in message["files"]:
 
 
323
  history.append(((x,), None))
324
  if message["text"] is not None:
325
  history.append((message["text"], None))
@@ -408,6 +410,7 @@ def bot(history, temperature, top_p, max_output_tokens):
408
  with open(file_path, "rb") as src, open(filename, "wb") as dst:
409
  dst.write(src.read())
410
 
 
411
  if not is_video:
412
  image_tensor = [
413
  our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
@@ -418,14 +421,14 @@ def bot(history, temperature, top_p, max_output_tokens):
418
  for f in image_list
419
  ]
420
  image_tensor = torch.stack(image_tensor)
421
- else:
422
  image_tensor = our_chatbot.image_processor.preprocess(image_list, return_tensors="pt")["pixel_values"].half().to(our_chatbot.model.device)
423
-
424
-
425
- image_token = DEFAULT_IMAGE_TOKEN * num_new_images if not is_video else DEFAULT_IMAGE_TOKEN * num_new_images
426
 
427
  inp = text
428
- inp = image_token + "\n" + inp
429
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
430
  # image = None
431
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
@@ -450,14 +453,20 @@ def bot(history, temperature, top_p, max_output_tokens):
450
  )
451
  print(our_chatbot.model.device)
452
  print(input_ids.device)
453
- print(image_tensor.device)
454
 
455
 
 
 
 
 
 
 
456
 
457
  generate_kwargs = dict(
458
  inputs=input_ids,
459
  streamer=streamer,
460
- images=[image_tensor] if is_video else image_tensor,
461
  do_sample=True,
462
  temperature=temperature,
463
  top_p=top_p,
@@ -610,7 +619,7 @@ with gr.Blocks(
610
  "files": [
611
  f"{cur_dir}/examples/realcase_math.jpg",
612
  ],
613
- "text": "Find the measure of angle 3.",
614
  }
615
  ],
616
  [
 
320
  # return history, gr.MultimodalTextbox(value=None, interactive=False)
321
  # else:
322
  for x in message["files"]:
323
+ if "realcase_video.jpg" in x:
324
+ x = x.replace("realcase_video.jpg", "realcase_video.mp4")
325
  history.append(((x,), None))
326
  if message["text"] is not None:
327
  history.append((message["text"], None))
 
410
  with open(file_path, "rb") as src, open(filename, "wb") as dst:
411
  dst.write(src.read())
412
 
413
+ image_tensor = []
414
  if not is_video:
415
  image_tensor = [
416
  our_chatbot.image_processor.preprocess(f, return_tensors="pt")["pixel_values"][
 
421
  for f in image_list
422
  ]
423
  image_tensor = torch.stack(image_tensor)
424
+ elif num_new_images > 0:
425
  image_tensor = our_chatbot.image_processor.preprocess(image_list, return_tensors="pt")["pixel_values"].half().to(our_chatbot.model.device)
426
+
427
+
428
+ image_token = DEFAULT_IMAGE_TOKEN * num_new_images + "\n"
429
 
430
  inp = text
431
+ inp = image_token + inp
432
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
433
  # image = None
434
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
 
453
  )
454
  print(our_chatbot.model.device)
455
  print(input_ids.device)
456
+ # print(image_tensor.device)
457
 
458
 
459
+ if is_video:
460
+ input_image_tensor = [image_tensor]
461
+ elif num_new_images > 0:
462
+ input_image_tensor = image_tensor
463
+ else:
464
+ input_image_tensor = None
465
 
466
  generate_kwargs = dict(
467
  inputs=input_ids,
468
  streamer=streamer,
469
+ images=input_image_tensor,
470
  do_sample=True,
471
  temperature=temperature,
472
  top_p=top_p,
 
619
  "files": [
620
  f"{cur_dir}/examples/realcase_math.jpg",
621
  ],
622
+ "text": "Find the measure of angle 3. Please provide a step by step solution.",
623
  }
624
  ],
625
  [