BoyuNLP commited on
Commit
d8b7bfc
·
1 Parent(s): d2bd1f7
llava/conversation.py CHANGED
@@ -162,7 +162,7 @@ class Conversation:
162
  images.append(image)
163
  return images
164
 
165
- def to_gradio_chatbot(self,extra_image=None):
166
  ret = []
167
  for i, (role, msg) in enumerate(reversed(self.messages[self.offset:])):
168
  if role==self.roles[0]:
@@ -195,6 +195,8 @@ class Conversation:
195
  image_format='JPEG')
196
  img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
197
  msg=img_str
 
 
198
  ret.append([msg, None])
199
  break
200
  return ret
 
162
  images.append(image)
163
  return images
164
 
165
+ def to_gradio_chatbot(self,extra_image=None,extra_coordinates=None):
166
  ret = []
167
  for i, (role, msg) in enumerate(reversed(self.messages[self.offset:])):
168
  if role==self.roles[0]:
 
195
  image_format='JPEG')
196
  img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
197
  msg=img_str
198
+ if not extra_coordinates:
199
+ msg=f"The element is at {extra_coordinates} on the screen: " +msg
200
  ret.append([msg, None])
201
  break
202
  return ret
llava/serve/gradio_web_server.py CHANGED
@@ -70,19 +70,6 @@ from PIL import Image, ImageDraw
70
 
71
 
72
  def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
73
- """
74
- 在给定的图片上绘制一个红色圆圈,并返回新的图片。如果 x, y 坐标不在图片范围内,
75
- 并且 y 超出了图片高度,则尝试将 y 减去 224;如果调整后的 y 仍然超出范围,则返回原图。
76
-
77
- 参数:
78
- - image: 传入的 PIL.Image 对象
79
- - x, y: 圆心的绝对坐标
80
- - radius: 圆圈的半径,默认为 10
81
- - color: 圆圈的颜色,默认为红色 (255, 0, 0)
82
-
83
- 返回:
84
- - 带有红色圆圈的 PIL.Image 对象,或者在坐标不合法时返回原图。
85
- """
86
  # 获取图片的宽度和高度
87
  img_width, img_height = image.size
88
 
@@ -108,9 +95,9 @@ def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
108
  right_down_point = (x + radius, y + radius)
109
 
110
  # 绘制圆圈 (outline 参数设置圆圈的颜色,width 设置线条粗细)
111
- draw.ellipse([left_up_point, right_down_point], outline=color, width=2)
112
 
113
- return image
114
 
115
  def get_conv_log_filename():
116
  t = datetime.datetime.now()
@@ -391,9 +378,9 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
391
  if len(all_images) > 0:
392
  # 假设我们对第一张图片进行 resize 并展示
393
 
394
- resized_image = draw_circle_on_image(resize_image(all_images[0]),original_coord[0],original_coord[1])
395
  # state.append_message(state.roles[1], ("", resized_image,"Default"))
396
- yield (state, state.to_gradio_chatbot(resized_image)) + (enable_btn,) * 5
397
 
398
  with open(get_conv_log_filename(), "a") as fout:
399
  data = {
 
70
 
71
 
72
  def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # 获取图片的宽度和高度
74
  img_width, img_height = image.size
75
 
 
95
  right_down_point = (x + radius, y + radius)
96
 
97
  # 绘制圆圈 (outline 参数设置圆圈的颜色,width 设置线条粗细)
98
+ draw.ellipse([left_up_point, right_down_point], outline=color, width=5)
99
 
100
+ return image,(x,y)
101
 
102
  def get_conv_log_filename():
103
  t = datetime.datetime.now()
 
378
  if len(all_images) > 0:
379
  # 假设我们对第一张图片进行 resize 并展示
380
 
381
+ resized_image,coordinates = draw_circle_on_image(resize_image(all_images[0]),original_coord[0],original_coord[1])
382
  # state.append_message(state.roles[1], ("", resized_image,"Default"))
383
+ yield (state, state.to_gradio_chatbot(resized_image,coordinates)) + (enable_btn,) * 5
384
 
385
  with open(get_conv_log_filename(), "a") as fout:
386
  data = {
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "uground_demo_test"
7
- version = "3.7"
8
  description = "Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents"
9
  readme = "README.md"
10
  requires-python = ">=3.8"
 
4
 
5
  [project]
6
  name = "uground_demo_test"
7
+ version = "3.9"
8
  description = "Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents"
9
  readme = "README.md"
10
  requires-python = ">=3.8"