Tonic commited on
Commit
cac8b0e
1 Parent(s): 8136693

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -279
app.py CHANGED
@@ -1,111 +1,20 @@
1
-
2
- from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, snapshot_download
3
- from argparse import ArgumentParser
4
- from pathlib import Path
5
- import shutil
6
- import copy
7
  import gradio as gr
8
- import os
 
9
  import re
 
10
  import secrets
11
- import tempfile
12
-
13
- #GlobalVariables
14
- os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
15
- DEFAULT_CKPT_PATH = 'qwen/Qwen-VL-Chat'
16
- REVISION = 'v1.0.4'
17
- BOX_TAG_PATTERN = r"<box>([\s\S]*?)</box>"
18
- PUNCTUATION = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
19
- uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir()) / "gradio")
20
- tokenizer = None
21
- model = None
22
-
23
- def _get_args() -> ArgumentParser:
24
- parser = ArgumentParser()
25
- parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
26
- help="Checkpoint name or path, default to %(default)r")
27
- parser.add_argument("--revision", type=str, default=REVISION)
28
- parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")
29
-
30
- parser.add_argument("--share", action="store_true", default=False,
31
- help="Create a publicly shareable link for the interface.")
32
- parser.add_argument("--inbrowser", action="store_true", default=False,
33
- help="Automatically launch the interface in a new tab on the default browser.")
34
- parser.add_argument("--server-port", type=int, default=8000,
35
- help="Demo server port.")
36
- parser.add_argument("--server-name", type=str, default="127.0.0.1",
37
- help="Demo server name.")
38
 
39
- args = parser.parse_args()
40
- return args
 
 
 
41
 
42
- def handle_image_submission(_chatbot, task_history, file) -> tuple:
43
- if file is None:
44
- return _chatbot, task_history
45
- file_path = save_image(file, uploaded_file_dir)
46
- history_item = ((file_path,), None)
47
- _chatbot.append(history_item)
48
- task_history.append(history_item)
49
- return predict(_chatbot, task_history)
50
-
51
-
52
- def _load_model_tokenizer(args) -> tuple:
53
- global tokenizer, model
54
- model_id = args.checkpoint_path
55
- model_dir = snapshot_download(model_id, revision=args.revision)
56
- tokenizer = AutoTokenizer.from_pretrained(
57
- model_dir, trust_remote_code=True, resume_download=True,
58
- )
59
-
60
- if args.cpu_only:
61
- device_map = "cpu"
62
- else:
63
- device_map = "auto"
64
-
65
- model = AutoModelForCausalLM.from_pretrained(
66
- model_dir,
67
- device_map=device_map,
68
- trust_remote_code=True,
69
- bf16=True,
70
- resume_download=True,
71
- ).eval()
72
- model.generation_config = GenerationConfig.from_pretrained(
73
- model_dir, trust_remote_code=True, resume_download=True,
74
- )
75
-
76
- return model, tokenizer
77
-
78
-
79
- def _parse_text(text: str) -> str:
80
- lines = text.split("\n")
81
- lines = [line for line in lines if line != ""]
82
- count = 0
83
- for i, line in enumerate(lines):
84
- if "```" in line:
85
- count += 1
86
- items = line.split("`")
87
- if count % 2 == 1:
88
- lines[i] = f'<pre><code class="language-{items[-1]}">'
89
- else:
90
- lines[i] = f"<br></code></pre>"
91
- else:
92
- if i > 0:
93
- if count % 2 == 1:
94
- line = line.replace("`", r"\`")
95
- line = line.replace("<", "&lt;")
96
- line = line.replace(">", "&gt;")
97
- line = line.replace(" ", "&nbsp;")
98
- line = line.replace("*", "&ast;")
99
- line = line.replace("_", "&lowbar;")
100
- line = line.replace("-", "&#45;")
101
- line = line.replace(".", "&#46;")
102
- line = line.replace("!", "&#33;")
103
- line = line.replace("(", "&#40;")
104
- line = line.replace(")", "&#41;")
105
- line = line.replace("$", "&#36;")
106
- lines[i] = "<br>" + line
107
- text = "".join(lines)
108
- return text
109
 
110
  def save_image(image_file, upload_dir: str) -> str:
111
  Path(upload_dir).mkdir(parents=True, exist_ok=True)
@@ -115,131 +24,33 @@ def save_image(image_file, upload_dir: str) -> str:
115
  f_output.write(f_input.read())
116
  return str(file_path)
117
 
118
-
119
- def add_file(history, task_history, file):
120
- if file is None:
121
- return history, task_history
122
- file_path = save_image(file)
123
- history = history + [((file_path,), None)]
124
- task_history = task_history + [((file_path,), None)]
125
- return history, task_history
126
-
127
- def predict(_chatbot, task_history) -> tuple:
128
- if not _chatbot:
129
- return _chatbot, task_history
130
- chat_query, chat_response = _chatbot[-1]
131
- print("predict called")
132
- if isinstance(chat_query, tuple):
133
- chat_query = chat_query[0]
134
- query = [{'image': chat_query}]
135
- else:
136
- query = [{'text': _parse_text(chat_query)}]
137
- inputs = tokenizer.from_list_format(query)
138
- tokenized_inputs = tokenizer(inputs, return_tensors='pt')
139
- tokenized_inputs = tokenized_inputs.to(model.device)
140
- pred = model.generate(**tokenized_inputs)
141
- response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
142
- if 'image' in query[0]:
143
- print("Model response:", response)
144
- image = tokenizer.draw_bbox_on_latest_picture(response)
145
- if image is not None:
146
- image_path = save_image(image, uploaded_file_dir)
147
- formatted_response = (chat_query, image_path)
148
- else:
149
- formatted_response = (chat_query, response)
150
- else:
151
- text_response = response.strip()
152
- formatted_response = (chat_query, text_response)
153
-
154
- _chatbot[-1] = formatted_response
155
- if task_history:
156
- task_history[-1] = formatted_response
157
- else:
158
- task_history.append(formatted_response)
159
-
160
- return _chatbot, task_history
161
-
162
- def save_uploaded_image(image_file, upload_dir):
163
- if image is None:
164
- return None
165
- temp_dir = secrets.token_hex(20)
166
- temp_dir = Path(uploaded_file_dir) / temp_dir
167
- temp_dir.mkdir(exist_ok=True, parents=True)
168
- name = f"tmp{secrets.token_hex(5)}.jpg"
169
- filename = temp_dir / name
170
- image.save(str(filename))
171
- return str(filename)
172
-
173
- def regenerate(_chatbot, task_history) -> list:
174
- if not task_history:
175
- return _chatbot
176
- item = task_history[-1]
177
- if item[1] is None:
178
- return _chatbot
179
- task_history[-1] = (item[0], None)
180
- chatbot_item = _chatbot.pop(-1)
181
- if chatbot_item[0] is None:
182
- _chatbot[-1] = (_chatbot[-1][0], None)
183
- else:
184
- _chatbot.append((chatbot_item[0], None))
185
- return predict(_chatbot, task_history)
186
-
187
- def add_text(history, task_history, text) -> tuple:
188
- if not text.strip():
189
- return history, task_history, chatbot
190
- if not any(isinstance(item[0], tuple) for item in history):
191
- prompt = "Please upload and submit an image to get started."
192
- history.append((prompt, None))
193
- task_history.append((prompt, None))
194
- chatbot.append(prompt)
195
- return history, task_history, chatbot
196
- task_text = text
197
- if len(text) >= 2 and text[-1] in PUNCTUATION and text[-2] not in PUNCTUATION:
198
- task_text = text[:-1]
199
- history_item = (_parse_text(task_text), None)
200
- history.append(history_item)
201
- task_history.append(history_item)
202
- return history, task_history, chatbot
203
-
204
- def add_file(history, task_history, file):
205
- if file is None:
206
- return history, task_history # Return if no file is uploaded
207
- file_path = file.name
208
- history = history + [((file.name,), None)]
209
- task_history = task_history + [((file.name,), None)]
210
- return history, task_history
211
-
212
- def reset_user_input():
213
- return gr.update(value="")
214
-
215
- def process_response(response: str) -> str:
216
- response = response.replace("<ref>", "").replace(r"</ref>", "")
217
- response = re.sub(BOX_TAG_PATTERN, "", response)
218
  return response
219
-
220
- def process_history_for_model(task_history) -> list:
221
- processed_history = []
222
- for query, response in task_history:
223
- if isinstance(query, tuple):
224
- query = {'image': query[0]}
225
- else:
226
- query = {'text': query}
227
- response = response or ""
228
- processed_history.append((query, response))
229
- return processed_history
230
 
231
- def reset_state(task_history) -> list:
232
- task_history.clear()
233
- return []
234
-
235
-
236
- def _launch_demo(args, model, tokenizer):
237
- uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
238
- Path(tempfile.gettempdir()) / "gradio"
239
- )
 
 
 
 
 
 
 
 
 
 
 
240
 
241
- with gr.Blocks() as demo:
242
- gr.Markdown("""
243
  # 🙋🏻‍♂️欢迎来到🌟Tonic 的🦆Qwen-VL-Chat🤩Bot!🚀
244
  # 🙋🏻‍♂️Welcome toTonic's Qwen-VL-Chat Bot!
245
  该WebUI基于Qwen-VL-Chat,实现聊天机器人功能。 但我必须解决它的很多问题,也许我也能获得一些荣誉。
@@ -248,51 +59,22 @@ Qwen-VL-Chat 是一种多模式输入模型。 您可以使用此空间来测试
248
  This WebUI is based on Qwen-VL-Chat, implementing chatbot functionalities. Qwen-VL-Chat is a multimodal input model. You can use this Space to test out the current model [qwen/Qwen-VL-Chat](https://huggingface.co/qwen/Qwen-VL-Chat) You can also use qwen/Qwen-VL-Chat🚀 by cloning this space. Simply click here: [Duplicate Space](https://huggingface.co/spaces/Tonic1/VLChat?duplicate=true)
249
  Join us: TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/nXx5wbX9) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
250
  """)
251
- with gr.Row():
252
- with gr.Column(scale=1):
253
- chatbot = gr.Chatbot(label='🦆Qwen-VL-Chat')
254
- with gr.Column(scale=1):
255
- with gr.Row():
256
- query = gr.Textbox(lines=2, label='Input', placeholder="Type your message here...")
257
- submit_btn = gr.Button("📨Submit")
258
- with gr.Row():
259
- file_upload = gr.UploadButton("⤴️Upload Image", file_types=["image"])
260
- submit_file_btn = gr.Button("📩Submit Image")
261
- regen_btn = gr.Button("♻️Regenerate")
262
- empty_bin = gr.Button("🧼Clear History")
263
- task_history = gr.State([])
264
-
265
- submit_btn.click(
266
- fn=predict,
267
- inputs=[chatbot, task_history],
268
- outputs=[chatbot]
269
- )
270
-
271
- submit_file_btn.click(
272
- fn=handle_image_submission,
273
- inputs=[chatbot, task_history, file_upload],
274
- outputs=[chatbot, task_history]
275
- )
276
-
277
- regen_btn.click(
278
- fn=regenerate,
279
- inputs=[chatbot, task_history],
280
- outputs=[chatbot]
281
- )
282
-
283
- empty_bin.click(
284
- fn=reset_state,
285
- inputs=[task_history],
286
- outputs=[task_history],
287
- )
288
-
289
- query.submit(
290
- fn=add_text,
291
- inputs=[chatbot, task_history, query],
292
- outputs=[chatbot, task_history, query]
293
- )
294
-
295
- gr.Markdown("""
296
  注意:此演示受 Qwen-VL 原始许可证的约束。我们强烈建议用户不要故意生成或允许他人故意生成有害内容,
297
  包括仇恨言论、暴力、色情、欺骗等。(注:本演示受Qwen-VL许可协议约束,强烈建议用户不要传播或允许他人传播以下内容,包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息 .)
298
  Note: This demo is governed by the original license of Qwen-VL. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content,
@@ -300,12 +82,3 @@ including hate speech, violence, pornography, deception, etc. (Note: This demo i
300
  """)
301
 
302
  demo.queue().launch()
303
-
304
-
305
- def main():
306
- args = _get_args()
307
- model, tokenizer = _load_model_tokenizer(args)
308
- _launch_demo(args, model, tokenizer)
309
-
310
- if __name__ == '__main__':
311
- main()
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from transformers.generation import GenerationConfig
4
  import re
5
+ from pathlib import Path
6
  import secrets
7
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Initialize the model and tokenizer
10
+ model_name = "qwen/Qwen-VL-Chat"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval()
13
+ model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True)
14
 
15
+ # Set device for model
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def save_image(image_file, upload_dir: str) -> str:
20
  Path(upload_dir).mkdir(parents=True, exist_ok=True)
 
24
  f_output.write(f_input.read())
25
  return str(file_path)
26
 
27
+ def clean_response(response: str) -> str:
28
+ response = re.sub(r'<ref>(.*?)</ref>(?:<box>.*?</box>)*(?:<quad>.*?</quad>)*', r'\1', response).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  return response
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def chat_with_model(image_path=None, text_query=None, history=None):
32
+ query_elements = []
33
+ if image_path:
34
+ query_elements.append({'image': image_path})
35
+ if text_query:
36
+ query_elements.append({'text': text_query})
37
+
38
+ query = tokenizer.from_list_format(query_elements)
39
+ tokenized_inputs = tokenizer(query, return_tensors='pt').to(device)
40
+ output = model.generate(**tokenized_inputs)
41
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
42
+ cleaned_response = clean_response(response)
43
+ return cleaned_response
44
+
45
+ def process_input(text, file):
46
+ image_path = None
47
+ if file is not None:
48
+ image_path = save_image(file, "uploaded_images")
49
+ response = chat_with_model(image_path=image_path, text_query=text)
50
+ return response
51
 
52
+ with gr.Blocks(theme=ParityError/Anime) as demo:
53
+ gr.Markdown("""
54
  # 🙋🏻‍♂️欢迎来到🌟Tonic 的🦆Qwen-VL-Chat🤩Bot!🚀
55
  # 🙋🏻‍♂️Welcome toTonic's Qwen-VL-Chat Bot!
56
  该WebUI基于Qwen-VL-Chat,实现聊天机器人功能。 但我必须解决它的很多问题,也许我也能获得一些荣誉。
 
59
  This WebUI is based on Qwen-VL-Chat, implementing chatbot functionalities. Qwen-VL-Chat is a multimodal input model. You can use this Space to test out the current model [qwen/Qwen-VL-Chat](https://huggingface.co/qwen/Qwen-VL-Chat) You can also use qwen/Qwen-VL-Chat🚀 by cloning this space. Simply click here: [Duplicate Space](https://huggingface.co/spaces/Tonic1/VLChat?duplicate=true)
60
  Join us: TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/nXx5wbX9) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
61
  """)
62
+ with gr.Row():
63
+ with gr.Column(scale=1):
64
+ chatbot = gr.Chatbot(label='Qwen-VL-Chat')
65
+ with gr.Column(scale=1):
66
+ with gr.Row():
67
+ query = gr.Textbox(lines=2, label='Input', placeholder="Type your message here...")
68
+ file_upload = gr.File(label="Upload Image")
69
+ submit_btn = gr.Button("Submit")
70
+
71
+ submit_btn.click(
72
+ fn=process_input,
73
+ inputs=[query, file_upload],
74
+ outputs=chatbot
75
+ )
76
+
77
+ gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  注意:此演示受 Qwen-VL 原始许可证的约束。我们强烈建议用户不要故意生成或允许他人故意生成有害内容,
79
  包括仇恨言论、暴力、色情、欺骗等。(注:本演示受Qwen-VL许可协议约束,强烈建议用户不要传播或允许他人传播以下内容,包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息 .)
80
  Note: This demo is governed by the original license of Qwen-VL. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content,
 
82
  """)
83
 
84
  demo.queue().launch()