maxiw commited on
Commit
3ba2ba9
1 Parent(s): 0da7bd3

add example

Browse files
Files changed (2) hide show
  1. app.py +17 -5
  2. assets/image2.jpg +0 -0
app.py CHANGED
@@ -22,8 +22,8 @@ DESCRIPTION = "# Qwen2-VL Object Localization Demo"
22
 
23
  def image_to_base64(image):
24
  buffered = BytesIO()
25
- image.save(buffered, format="PNG") # Save the image in memory as PNG
26
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") # Encode image to base64
27
  return img_str
28
 
29
 
@@ -101,21 +101,33 @@ css = """
101
  border: 1px solid #ccc;
102
  }
103
  """
 
104
 
105
  with gr.Blocks(css=css) as demo:
106
  gr.Markdown(DESCRIPTION)
107
  with gr.Tab(label="Qwen2-VL Input"):
108
  with gr.Row():
109
  with gr.Column():
110
- input_img = gr.Image(label="Input Picture", type="pil")
111
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
112
- system_prompt = gr.Textbox(label="System Prompt", value="You are a helpfull assistant to detect objects in images. When asked to detect elements based on a description you return bounding boxes for all elements in the form of [xmin, ymin, xmax, ymax] whith the values beeing scaled to 1000 by 1000 pixels. When there are more than one result answer with a list of bounding boxes in the form of [[xmin, ymin, xmax, ymax], [xmin, ymin, xmax, ymax], ...].")
113
  text_input = gr.Textbox(label="Description of Localization Target")
114
  submit_btn = gr.Button(value="Submit")
115
  with gr.Column():
116
  model_output_text = gr.Textbox(label="Model Output Text")
117
  parsed_boxes = gr.Textbox(label="Parsed Boxes")
118
- annotated_image = gr.Image(label="Annotated Picture")
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  submit_btn.click(run_example, [input_img, text_input, system_prompt, model_selector], [model_output_text, parsed_boxes, annotated_image])
121
 
 
22
 
23
  def image_to_base64(image):
24
  buffered = BytesIO()
25
+ image.save(buffered, format="PNG")
26
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
27
  return img_str
28
 
29
 
 
101
  border: 1px solid #ccc;
102
  }
103
  """
104
+ default_system_prompt = "You are a helpfull assistant to detect objects in images. When asked to detect elements based on a description you return bounding boxes for all elements in the form of [xmin, ymin, xmax, ymax] whith the values beeing scaled to 1000 by 1000 pixels. When there are more than one result answer with a list of bounding boxes in the form of [[xmin, ymin, xmax, ymax], [xmin, ymin, xmax, ymax], ...]."
105
 
106
  with gr.Blocks(css=css) as demo:
107
  gr.Markdown(DESCRIPTION)
108
  with gr.Tab(label="Qwen2-VL Input"):
109
  with gr.Row():
110
  with gr.Column():
111
+ input_img = gr.Image(label="Input Image", type="pil")
112
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
113
+ system_prompt = gr.Textbox(label="System Prompt", value=default_system_prompt)
114
  text_input = gr.Textbox(label="Description of Localization Target")
115
  submit_btn = gr.Button(value="Submit")
116
  with gr.Column():
117
  model_output_text = gr.Textbox(label="Model Output Text")
118
  parsed_boxes = gr.Textbox(label="Parsed Boxes")
119
+ annotated_image = gr.Image(label="Annotated Image")
120
+
121
+ gr.Examples(
122
+ examples=[
123
+ ["assets/image2.jpg", "orange button", default_system_prompt],
124
+ ],
125
+ inputs=[input_img, text_input, system_prompt],
126
+ outputs=[model_output_text, parsed_boxes, annotated_image],
127
+ fn=run_example,
128
+ cache_examples=True,
129
+ label="Try examples"
130
+ )
131
 
132
  submit_btn.click(run_example, [input_img, text_input, system_prompt, model_selector], [model_output_text, parsed_boxes, annotated_image])
133
 
assets/image2.jpg ADDED