Tonic commited on
Commit
d0aa198
1 Parent(s): 215954f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -10
app.py CHANGED
@@ -2,20 +2,23 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from transformers.generation import GenerationConfig
4
  import re
 
5
  from pathlib import Path
6
  import secrets
7
  import torch
 
8
 
9
- # Initialize the model and tokenizer
10
  model_name = "qwen/Qwen-VL-Chat"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval()
13
  model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True)
14
 
15
- # Set device for model
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model.to(device)
18
 
 
 
 
19
  def save_image(image_file, upload_dir: str) -> str:
20
  Path(upload_dir).mkdir(parents=True, exist_ok=True)
21
  filename = secrets.token_hex(10) + Path(image_file.name).suffix
@@ -29,27 +32,51 @@ def clean_response(response: str) -> str:
29
  return response
30
 
31
  def chat_with_model(image_path=None, text_query=None, history=None):
 
32
  query_elements = []
33
  if image_path:
34
  query_elements.append({'image': image_path})
35
  if text_query:
36
  query_elements.append({'text': text_query})
37
-
38
  query = tokenizer.from_list_format(query_elements)
39
  tokenized_inputs = tokenizer(query, return_tensors='pt').to(device)
40
  output = model.generate(**tokenized_inputs)
41
  response = tokenizer.decode(output[0], skip_special_tokens=True)
42
  cleaned_response = clean_response(response)
43
  return cleaned_response
 
 
 
 
 
 
 
 
44
 
45
- def process_input(text, file):
 
 
46
  image_path = None
47
  if file is not None:
48
  image_path = save_image(file, "uploaded_images")
49
- response = chat_with_model(image_path=image_path, text_query=text)
50
- # Format the response for the Chatbot component
51
- return [("bot", response)]
 
 
 
 
 
 
 
 
 
 
 
 
52
 
 
53
  with gr.Blocks() as demo:
54
  gr.Markdown("""
55
  # 🙋🏻‍♂️欢迎来到🌟Tonic 的🦆Qwen-VL-Chat🤩Bot!🚀
@@ -69,10 +96,12 @@ Join us: TeamTonic is always making cool demos! Join our active builder's comm
69
  file_upload = gr.File(label="Upload Image")
70
  submit_btn = gr.Button("Submit")
71
 
 
 
72
  submit_btn.click(
73
  fn=process_input,
74
- inputs=[query, file_upload],
75
- outputs=chatbot
76
  )
77
 
78
  gr.Markdown("""
@@ -81,5 +110,7 @@ Join us: TeamTonic is always making cool demos! Join our active builder's comm
81
  Note: This demo is governed by the original license of Qwen-VL. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content,
82
  including hate speech, violence, pornography, deception, etc. (Note: This demo is subject to the license agreement of Qwen-VL. We strongly advise users not to disseminate or allow others to disseminate the following content, including but not limited to hate speech, violence, pornography, and fraud-related harmful information.)
83
  """)
84
-
85
  demo.queue().launch()
 
 
 
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from transformers.generation import GenerationConfig
4
  import re
5
+ import copy
6
  from pathlib import Path
7
  import secrets
8
  import torch
9
+ from PIL import Image, ImageDraw
10
 
 
11
  model_name = "qwen/Qwen-VL-Chat"
12
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
  model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval()
14
  model.generation_config = GenerationConfig.from_pretrained(model_name, trust_remote_code=True)
15
 
 
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model.to(device)
18
 
19
+ BOX_TAG_PATTERN = r"<box>([\s\S]*?)</box>"
20
+ PUNCTUATION = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』&#8203;``【oaicite:0】``&#8203;〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
21
+
22
  def save_image(image_file, upload_dir: str) -> str:
23
  Path(upload_dir).mkdir(parents=True, exist_ok=True)
24
  filename = secrets.token_hex(10) + Path(image_file.name).suffix
 
32
  return response
33
 
34
  def chat_with_model(image_path=None, text_query=None, history=None):
35
+ # Modify this function to use 'history' if your model requires it
36
  query_elements = []
37
  if image_path:
38
  query_elements.append({'image': image_path})
39
  if text_query:
40
  query_elements.append({'text': text_query})
41
+ # Add history processing here if needed
42
  query = tokenizer.from_list_format(query_elements)
43
  tokenized_inputs = tokenizer(query, return_tensors='pt').to(device)
44
  output = model.generate(**tokenized_inputs)
45
  response = tokenizer.decode(output[0], skip_special_tokens=True)
46
  cleaned_response = clean_response(response)
47
  return cleaned_response
48
+ def draw_boxes(image_path, response):
49
+ image = Image.open(image_path)
50
+ draw = ImageDraw.Draw(image)
51
+ boxes = re.findall(r'<box>\((\d+),(\d+)\),\((\d+),(\d+)\)</box>', response)
52
+ for box in boxes:
53
+ x1, y1, x2, y2 = map(int, box)
54
+ draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
55
+ return image
56
 
57
+ def process_input(text=None, file=None, task_history=None):
58
+ if task_history is None:
59
+ task_history = []
60
  image_path = None
61
  if file is not None:
62
  image_path = save_image(file, "uploaded_images")
63
+ response = chat_with_model(image_path=image_path, text_query=text, history=task_history)
64
+ task_history.append((text, response))
65
+
66
+ if "<box>" in response:
67
+ if image_path:
68
+ image_with_boxes = draw_boxes(image_path, response)
69
+ image_with_boxes_path = image_path.replace(".jpg", "_boxed.jpg")
70
+ image_with_boxes.save(image_with_boxes_path)
71
+ return [("bot", response), "image", image_with_boxes_path], task_history
72
+ else:
73
+ return [("bot", response), "text", None], task_history
74
+ else:
75
+ # Clean the response if it contains any box-like annotations
76
+ clean_response = re.sub(r'<ref>(.*?)</ref>(?:<box>.*?</box>)*(?:<quad>.*?</quad>)*', r'\1', response).strip()
77
+ return [("bot", clean_response), "text", None], task_history
78
 
79
+ # Define Gradio interface
80
  with gr.Blocks() as demo:
81
  gr.Markdown("""
82
  # 🙋🏻‍♂️欢迎来到🌟Tonic 的🦆Qwen-VL-Chat🤩Bot!🚀
 
96
  file_upload = gr.File(label="Upload Image")
97
  submit_btn = gr.Button("Submit")
98
 
99
+ task_history = []
100
+
101
  submit_btn.click(
102
  fn=process_input,
103
+ inputs=[query, file_upload, task_history],
104
+ outputs=[chatbot, task_history]
105
  )
106
 
107
  gr.Markdown("""
 
110
  Note: This demo is governed by the original license of Qwen-VL. We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content,
111
  including hate speech, violence, pornography, deception, etc. (Note: This demo is subject to the license agreement of Qwen-VL. We strongly advise users not to disseminate or allow others to disseminate the following content, including but not limited to hate speech, violence, pornography, and fraud-related harmful information.)
112
  """)
 
113
  demo.queue().launch()
114
+
115
+ if __name__ == "__main__":
116
+ demo.launch()