vilarin commited on
Commit
09399fd
1 Parent(s): 95dadc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -73
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from PIL import Image
4
  import gradio as gr
5
  import spaces
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
7
  import os
8
  import time
9
  from huggingface_hub import hf_hub_download
@@ -18,7 +18,7 @@ MODEL_NAME = MODEL_ID.split("/")[-1]
18
 
19
  TITLE = "<h1><center>VL-Chatbox</center></h1>"
20
 
21
- DESCRIPTION = "<h3><center>MODEL LOADED: " + MODEL_NAME + "</center></h3>"
22
 
23
  DEFAULT_SYSTEM = "You named Chatbox. You are a good assitant."
24
 
@@ -31,54 +31,15 @@ CSS = """
31
  }
32
  """
33
 
34
- filenames = [
35
- "config.json",
36
- "generation_config.json",
37
- "model-00001-of-00004.safetensors",
38
- "model-00002-of-00004.safetensors",
39
- "model-00003-of-00004.safetensors",
40
- "model-00004-of-00004.safetensors",
41
- "model.safetensors.index.json",
42
- "special_tokens_map.json",
43
- "tokenizer.json",
44
- "tokenizer_config.json"
45
- ]
46
-
47
- for filename in filenames:
48
- downloaded_model_path = hf_hub_download(
49
- repo_id=MODEL_ID,
50
- filename=filename,
51
- local_dir="./model/"
52
- )
53
-
54
- for items in os.listdir("./model"):
55
- print(items)
56
-
57
- # def no_logger():
58
- # logging.config.dictConfig({
59
- # 'version': 1,
60
- # 'disable_existing_loggers': True,
61
- # })
62
-
63
-
64
  model = AutoModelForCausalLM.from_pretrained(
65
- "./model/",
66
  torch_dtype=torch.float16,
67
  low_cpu_mem_usage=True,
68
  trust_remote_code=True
69
  ).to(0)
70
- tokenizer = AutoTokenizer.from_pretrained("./model/",trust_remote_code=True)
71
- vision_tower = model.get_vision_tower()
72
- vision_tower.load_model()
73
- vision_tower.to(device="cuda", dtype=torch.float16)
74
- image_processor = vision_tower.image_processor
75
- tokenizer.pad_token = tokenizer.eos_token
76
 
77
- # Define terminators (if applicable, adjust as needed)
78
- terminators = [
79
- tokenizer.eos_token_id,
80
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
81
- ]
82
 
83
 
84
 
@@ -88,49 +49,36 @@ def stream_chat(message, history: list, system: str, temperature: float, max_new
88
  print(message)
89
  conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
90
  for prompt, answer in history:
91
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
92
-
93
  conversation.append({"role": "user", "content": message['text']})
 
94
  if message["files"]:
95
  image = Image.open(message["files"][0]).convert('RGB')
96
- # Process the conversation text
97
- inputs = model.build_conversation_input_ids(
98
- tokenizer,
99
- query=message['text'],
100
- image=image,
101
- image_processor=image_processor,
102
- )
103
- input_ids = inputs["input_ids"].to(device='cuda', non_blocking=True)
104
- images = inputs["image"].to(dtype=torch.float16, device='cuda', non_blocking=True)
105
  else:
106
- input_ids = tokenizer.apply_chat_template(
107
- conversation,
108
- add_generation_prompt=True,
109
- return_tensors="pt"
110
- ).to(model.device)
111
- images = None
112
 
113
  generate_kwargs = dict(
114
- input_ids=input_ids,
115
  max_new_tokens=max_new_tokens,
116
  temperature=temperature,
117
  do_sample=True,
118
- num_beams=1,
119
- eos_token_id=terminators,
120
- images=images
121
  )
122
  if temperature == 0:
123
  generate_kwargs["do_sample"] = False
 
 
 
124
 
125
- output_ids=model.generate(**generate_kwargs)
126
- input_token_len = input_ids.shape[1]
127
- outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
128
- outputs = outputs.strip()
129
-
130
- for i in range(len(outputs)):
131
- time.sleep(0.05)
132
- yield outputs[: i + 1]
133
 
 
 
 
 
134
 
135
 
136
  chatbot = gr.Chatbot(height=450)
 
3
  from PIL import Image
4
  import gradio as gr
5
  import spaces
6
+ from transformers import AutoModelForCausalLM, AutoProcessor,TextIteratorStreamer
7
  import os
8
  import time
9
  from huggingface_hub import hf_hub_download
 
18
 
19
  TITLE = "<h1><center>VL-Chatbox</center></h1>"
20
 
21
+ DESCRIPTION = "<h3><center>MODEL: " + MODEL_NAME + "</center></h3>"
22
 
23
  DEFAULT_SYSTEM = "You named Chatbox. You are a good assitant."
24
 
 
31
  }
32
  """
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  model = AutoModelForCausalLM.from_pretrained(
35
+ MODEL_ID,
36
  torch_dtype=torch.float16,
37
  low_cpu_mem_usage=True,
38
  trust_remote_code=True
39
  ).to(0)
40
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
 
41
 
42
+ eos_token_id=processor.tokenizer.eos_token_id
 
 
 
 
43
 
44
 
45
 
 
49
  print(message)
50
  conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
51
  for prompt, answer in history:
52
+ conversation.extend([{"role": "user", "content": f"<|image_1|>\n{prompt}"}, {"role": "assistant", "content": answer}])
 
53
  conversation.append({"role": "user", "content": message['text']})
54
+
55
  if message["files"]:
56
  image = Image.open(message["files"][0]).convert('RGB')
 
 
 
 
 
 
 
 
 
57
  else:
58
+ image = None
59
+
60
+ prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
61
+ inputs = processor(prompt, [image], return_tensors="pt").to(0)
 
 
62
 
63
  generate_kwargs = dict(
 
64
  max_new_tokens=max_new_tokens,
65
  temperature=temperature,
66
  do_sample=True,
67
+ eos_token_id=eos_token_id,
 
 
68
  )
69
  if temperature == 0:
70
  generate_kwargs["do_sample"] = False
71
+ generate_kwargs = {**inputs, **generate_kwargs}
72
+
73
+ streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
74
 
75
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
76
+ thread.start()
 
 
 
 
 
 
77
 
78
+ buffer = ""
79
+ for new_text in streamer:
80
+ buffer += new_text
81
+ yield buffer
82
 
83
 
84
  chatbot = gr.Chatbot(height=450)