NGUYEN, Xuan Phi commited on
Commit
43147aa
·
1 Parent(s): 2997e80
multipurpose_chatbot/demos/multimodal_chat_interface.py CHANGED
@@ -944,8 +944,8 @@ def vision_chat_response_stream_multiturn_engine(
944
  if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
945
  raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
946
 
947
- print(f'{image_paths=}')
948
- print(full_prompt)
949
  outputs = None
950
  response = None
951
  num_tokens = -1
@@ -995,7 +995,7 @@ def doc_chat_response_stream_multiturn_engine(
995
  if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
996
  raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
997
 
998
- print(full_prompt)
999
  outputs = None
1000
  response = None
1001
  num_tokens = -1
@@ -1050,8 +1050,8 @@ def vision_doc_chat_response_stream_multiturn_engine(
1050
  if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
1051
  raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
1052
 
1053
- print(full_prompt)
1054
- print(f'{image_paths=}')
1055
  outputs = None
1056
  response = None
1057
  num_tokens = -1
 
944
  if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
945
  raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
946
 
947
+ # print(f'{image_paths=}')
948
+ # print(full_prompt)
949
  outputs = None
950
  response = None
951
  num_tokens = -1
 
995
  if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
996
  raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
997
 
998
+ # print(full_prompt)
999
  outputs = None
1000
  response = None
1001
  num_tokens = -1
 
1050
  if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
1051
  raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
1052
 
1053
+ # print(full_prompt)
1054
+ # print(f'{image_paths=}')
1055
  outputs = None
1056
  response = None
1057
  num_tokens = -1
multipurpose_chatbot/engines/transformers_engine.py CHANGED
@@ -1,8 +1,13 @@
1
 
2
  try:
3
  import spaces
 
 
 
4
  except ModuleNotFoundError:
5
  print(f'Cannot import hf `spaces` with `import spaces`.')
 
 
6
  import os
7
  import numpy as np
8
  import argparse
@@ -541,7 +546,7 @@ class TransformersEngine(BaseEngine):
541
  if message_safety is not None:
542
  raise gr.Error(message_safety)
543
 
544
- @spaces.GPU
545
  def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
546
 
547
  # ! MUST PUT INSIDE torch.no_grad() otherwise it will overflow OOM
@@ -558,6 +563,12 @@ class TransformersEngine(BaseEngine):
558
 
559
  with torch.no_grad():
560
  inputs = self.tokenizer(prompt, return_tensors='pt')
 
 
 
 
 
 
561
  num_tokens = inputs.input_ids.size(1)
562
 
563
  inputs = inputs.to(self._model.device)
@@ -574,7 +585,7 @@ class TransformersEngine(BaseEngine):
574
  response = None
575
  for index, token in enumerate(generator):
576
  out_tokens.extend(token.tolist())
577
- response = self.tokenizer.decode(out_tokens)
578
  if "<|im_start|>assistant\n" in response:
579
  response = response.split("<|im_start|>assistant\n")[-1]
580
  num_tokens += 1
 
1
 
2
  try:
3
  import spaces
4
+ def maybe_spaces_gpu(fn):
5
+ fn = spaces.GPU(fn)
6
+ return fn
7
  except ModuleNotFoundError:
8
  print(f'Cannot import hf `spaces` with `import spaces`.')
9
+ def maybe_spaces_gpu(fn):
10
+ return fn
11
  import os
12
  import numpy as np
13
  import argparse
 
546
  if message_safety is not None:
547
  raise gr.Error(message_safety)
548
 
549
+ @maybe_spaces_gpu
550
  def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
551
 
552
  # ! MUST PUT INSIDE torch.no_grad() otherwise it will overflow OOM
 
563
 
564
  with torch.no_grad():
565
  inputs = self.tokenizer(prompt, return_tensors='pt')
566
+ # whether to print the full prompts
567
+ retok_full_prompt = self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=False)
568
+ print(f"retok_full_prompt:\n{retok_full_prompt}>>>>")
569
+ begin_bos = inputs.input_ids[0][0] == self.tokenizer.bos_token_id
570
+ print(f'begin_bos: {begin_bos}')
571
+
572
  num_tokens = inputs.input_ids.size(1)
573
 
574
  inputs = inputs.to(self._model.device)
 
585
  response = None
586
  for index, token in enumerate(generator):
587
  out_tokens.extend(token.tolist())
588
+ response = self.tokenizer.decode(out_tokens, skip_special_tokens=True)
589
  if "<|im_start|>assistant\n" in response:
590
  response = response.split("<|im_start|>assistant\n")[-1]
591
  num_tokens += 1