vilarin commited on
Commit
b6ce32d
1 Parent(s): 0b2f4ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -3,7 +3,8 @@ import torch
3
  from PIL import Image
4
  import gradio as gr
5
  import spaces
6
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextIteratorStreamer
 
7
  import os
8
  from huggingface_hub import hf_hub_download
9
 
@@ -93,13 +94,21 @@ def stream_chat(message, history: list, system: str, temperature: float, max_new
93
  if message["files"]:
94
  image = Image.open(message["files"][0]).convert('RGB')
95
  # Process the conversation text
96
- inputs = model.build_conversation_input_ids(tokenizer, query=message['text'], image=image, image_processor=image_processor)
 
 
 
 
 
97
  input_ids = inputs["input_ids"].to(device='cuda', non_blocking=True)
98
  images = inputs["image"].to(dtype=torch.float16, device='cuda', non_blocking=True)
99
  else:
100
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
 
 
 
 
101
  images = None
102
-
103
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
104
 
105
  generate_kwargs = dict(
@@ -116,10 +125,19 @@ def stream_chat(message, history: list, system: str, temperature: float, max_new
116
 
117
  t = Thread(target=model.generate, kwargs=generate_kwargs)
118
  t.start()
 
 
119
  output = ""
120
  for new_token in streamer:
121
- output += new_token
122
- yield output
 
 
 
 
 
 
 
123
 
124
 
125
  chatbot = gr.Chatbot(height=450)
 
3
  from PIL import Image
4
  import gradio as gr
5
  import spaces
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
+ from huggingface_hub.inference._generated.types import TextGenerationStreamOutput, TextGenerationStreamOutputToken
8
  import os
9
  from huggingface_hub import hf_hub_download
10
 
 
94
  if message["files"]:
95
  image = Image.open(message["files"][0]).convert('RGB')
96
  # Process the conversation text
97
+ inputs = model.build_conversation_input_ids(
98
+ tokenizer,
99
+ query=message['text'],
100
+ image=image,
101
+ image_processor=image_processor,
102
+ )
103
  input_ids = inputs["input_ids"].to(device='cuda', non_blocking=True)
104
  images = inputs["image"].to(dtype=torch.float16, device='cuda', non_blocking=True)
105
  else:
106
+ input_ids = tokenizer.apply_chat_template(
107
+ conversation,
108
+ add_generation_prompt=True,
109
+ return_tensors="pt"
110
+ ).to(model.device)
111
  images = None
 
112
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
113
 
114
  generate_kwargs = dict(
 
125
 
126
  t = Thread(target=model.generate, kwargs=generate_kwargs)
127
  t.start()
128
+ input_token_len = input_ids.shape[1]
129
+
130
  output = ""
131
  for new_token in streamer:
132
+ yield TextGenerationStreamOutput(
133
+ index=0,
134
+ token=TextGenerationStreamOutputToken(
135
+ id=0,
136
+ logprob=0,
137
+ text=next_text,
138
+ special=False,
139
+ )
140
+ )
141
 
142
 
143
  chatbot = gr.Chatbot(height=450)